All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] Capture more useful details in error state
@ 2016-01-28 19:01 Arun Siluvery
  2016-01-28 19:01 ` [PATCH 1/5] drm/i915/error: capture execlist state on error Arun Siluvery
                   ` (5 more replies)
  0 siblings, 6 replies; 17+ messages in thread
From: Arun Siluvery @ 2016-01-28 19:01 UTC (permalink / raw)
  To: intel-gfx

Few patches to capture more useful details in error state - these details
include execlist state, csb events and their decoded form, WA ctx batch
buffer.

Except WA ctx batch all other patches are already sent as part of preemption
patch series but these are independent patches; it may take a while before
preemption patches are reviewed, merged but these patches can be useful now so
extracted them from that series.

Example output looks like this,

  EXECLIST_STATUS: 0x00000301
  EXECLIST_CTX_ID: 0x00000000
  EXECLIST_CSBPTR: 0x00000505
  EXECLIST_CSB_WR: 5
  EXECLIST_CSB_RD: 5
  EXECLIST_SWL_RD: 5
  EXECLIST_CTX/CSB[0]: 0x000.00000 / 0x00000001 | I->A |      |      |      |      |      |     
  EXECLIST_CTX/CSB[1]: 0x000.00a33 / 0x00000018 |      |      |      | A->I | DONE |      |     
  EXECLIST_CTX/CSB[2]: 0x000.00000 / 0x00000001 | I->A |      |      |      |      |      |     
  EXECLIST_CTX/CSB[3]: 0x000.00a33 / 0x00000018 |      |      |      | A->I | DONE |      |     
  EXECLIST_CTX/CSB[4]: 0x000.00000 / 0x00000001 | I->A |      |      |      |      |      |     
  EXECLIST_CTX/CSB[5]: 0x000.00a33 / 0x00000018 |      |      |      | A->I | DONE |      |     

Arun Siluvery (1):
  drm/i915/error: Capture WA ctx batch in error state

Dave Gordon (4):
  drm/i915/error: capture execlist state on error
  drm/i915/error: capture ringbuffer pointed to by START
  drm/i915/error: report ctx id & desc for each request in the queue
  drm/i915/error: improve CSB reporting

 drivers/gpu/drm/i915/i915_drv.h       |  14 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c | 175 +++++++++++++++++++++++++++++-----
 2 files changed, 165 insertions(+), 24 deletions(-)

-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/5] drm/i915/error: capture execlist state on error
  2016-01-28 19:01 [PATCH 0/5] Capture more useful details in error state Arun Siluvery
@ 2016-01-28 19:01 ` Arun Siluvery
  2016-01-29  7:49   ` Mika Kuoppala
  2016-01-28 19:01 ` [PATCH 2/5] drm/i915/error: capture ringbuffer pointed to by START Arun Siluvery
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 17+ messages in thread
From: Arun Siluvery @ 2016-01-28 19:01 UTC (permalink / raw)
  To: intel-gfx

From: Dave Gordon <david.s.gordon@intel.com>

At present, execlist status/ctx_id and CSBs, not the submission queue

For: VIZ-2021
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  9 +++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 38 +++++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 01cc982..8b30242 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -552,6 +552,15 @@ struct drm_i915_error_state {
 		u32 rc_psmi; /* sleep state */
 		u32 semaphore_mboxes[I915_NUM_RINGS - 1];
 
+		/* Execlists */
+		u32 execlist_status;
+		u32 execlist_ctx_id;
+		u32 execlist_csb_raw_pointer;
+		u32 execlist_csb_write_pointer;
+		u32 execlist_csb_read_pointer;
+		u32 execlist_csb[6];
+		u32 execlist_ctx[6];
+
 		struct drm_i915_error_object {
 			int page_count;
 			u64 gtt_offset;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 978c026..bf53c2b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -247,6 +247,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 				  int ring_idx)
 {
 	struct drm_i915_error_ring *ring = &error->ring[ring_idx];
+	int i;
 
 	if (!ring->valid)
 		return;
@@ -288,7 +289,6 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 		err_printf(m, "  GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode);
 
 		if (INTEL_INFO(dev)->gen >= 8) {
-			int i;
 			for (i = 0; i < 4; i++)
 				err_printf(m, "  PDP%d: 0x%016llx\n",
 					   i, ring->vm_info.pdp[i]);
@@ -304,6 +304,17 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  hangcheck: %s [%d]\n",
 		   hangcheck_action_to_str(ring->hangcheck_action),
 		   ring->hangcheck_score);
+
+	err_printf(m, "  EXECLIST_STATUS: 0x%08x\n", ring->execlist_status);
+	err_printf(m, "  EXECLIST_CTX_ID: 0x%08x\n", ring->execlist_ctx_id);
+	err_printf(m, "  EXECLIST_CSBPTR: 0x%08x\n", ring->execlist_csb_raw_pointer);
+	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
+	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
+
+	for (i = 0; i < 6; i++) {
+		err_printf(m, "  EXECLIST_CSB[%d]: 0x%08x\n", i, ring->execlist_csb[i]);
+		err_printf(m, "  EXECLIST_CTX[%d]: 0x%08x\n", i, ring->execlist_ctx[i]);
+	}
 }
 
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -965,8 +976,27 @@ static void i915_record_ring_state(struct drm_device *dev,
 					I915_READ(GEN8_RING_PDP_LDW(ring, i));
 			}
 	}
-}
 
+	if (i915.enable_execlists) {
+		int i;
+		u32 status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
+		u8 write_pointer = status_pointer & 0x07;
+		u8 read_pointer = ring->next_context_status_buffer;
+		if (read_pointer > write_pointer)
+			write_pointer += 6;
+
+		ering->execlist_status = I915_READ(RING_EXECLIST_STATUS_LO(ring));
+		ering->execlist_ctx_id = I915_READ(RING_EXECLIST_STATUS_HI(ring));
+		ering->execlist_csb_raw_pointer = status_pointer;
+		ering->execlist_csb_write_pointer = write_pointer;
+		ering->execlist_csb_read_pointer = read_pointer;
+
+		for (i = 0; i < 6; i++) {
+			ering->execlist_csb[i] = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, i));
+			ering->execlist_ctx[i] = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, i));
+		}
+	}
+}
 
 static void i915_gem_record_active_context(struct intel_engine_cs *ring,
 					   struct drm_i915_error_state *error,
@@ -1252,6 +1282,10 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 	if (HAS_HW_CONTEXTS(dev))
 		error->ccid = I915_READ(CCID);
 
+	if (HAS_LOGICAL_RING_CONTEXTS(dev)) {
+		// Surely something to capture here ...
+	}
+
 	if (INTEL_INFO(dev)->gen >= 8) {
 		error->ier = I915_READ(GEN8_DE_MISC_IER);
 		for (i = 0; i < 4; i++)
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/5] drm/i915/error: capture ringbuffer pointed to by START
  2016-01-28 19:01 [PATCH 0/5] Capture more useful details in error state Arun Siluvery
  2016-01-28 19:01 ` [PATCH 1/5] drm/i915/error: capture execlist state on error Arun Siluvery
@ 2016-01-28 19:01 ` Arun Siluvery
  2016-01-29 11:47   ` Chris Wilson
  2016-01-28 19:01 ` [PATCH 3/5] drm/i915/error: report ctx id & desc for each request in the queue Arun Siluvery
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 17+ messages in thread
From: Arun Siluvery @ 2016-01-28 19:01 UTC (permalink / raw)
  To: intel-gfx

From: Dave Gordon <david.s.gordon@intel.com>

For: VIZ-2021
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 36 +++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8b30242..8b510fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -565,7 +565,7 @@ struct drm_i915_error_state {
 			int page_count;
 			u64 gtt_offset;
 			u32 *pages[0];
-		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
+		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
 		struct drm_i915_error_request {
 			long jiffies;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index bf53c2b..5c8ec63 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -475,13 +475,20 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
-		if ((obj = error->ring[i].ringbuffer)) {
+		if ((obj = error->ring[i].req_ringbuffer)) {
 			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
 				   dev_priv->ring[i].name,
 				   lower_32_bits(obj->gtt_offset));
 			print_error_obj(m, obj);
 		}
 
+		if ((obj = error->ring[i].hw_ringbuffer)) {
+			err_printf(m, "%s --- HW ringbuffer = 0x%08x\n",
+				   dev_priv->ring[i].name,
+				   lower_32_bits(obj->gtt_offset));
+			print_error_obj(m, obj);
+		}
+
 		if ((obj = error->ring[i].hws_page)) {
 			u64 hws_offset = obj->gtt_offset;
 			u32 *hws_page = &obj->pages[0][0];
@@ -592,7 +599,8 @@ static void i915_error_state_free(struct kref *error_ref)
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
 		i915_error_object_free(error->ring[i].batchbuffer);
 		i915_error_object_free(error->ring[i].wa_batchbuffer);
-		i915_error_object_free(error->ring[i].ringbuffer);
+		i915_error_object_free(error->ring[i].req_ringbuffer);
+		i915_error_object_free(error->ring[i].hw_ringbuffer);
 		i915_error_object_free(error->ring[i].hws_page);
 		i915_error_object_free(error->ring[i].ctx);
 		kfree(error->ring[i].requests);
@@ -1004,19 +1012,27 @@ static void i915_gem_record_active_context(struct intel_engine_cs *ring,
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_object *obj;
-
-	/* Currently render ring is the only HW context user */
-	if (ring->id != RCS || !error->ccid)
-		return;
+	u64 base;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		if (!i915_gem_obj_ggtt_bound(obj))
 			continue;
 
-		if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
-			ering->ctx = i915_error_ggtt_object_create(dev_priv, obj);
-			break;
+		base = i915_gem_obj_ggtt_offset(obj);
+
+		if (base == ering->start) {
+			ering->hw_ringbuffer = i915_error_ggtt_object_create(dev_priv, obj);
+			continue;
 		}
+
+		if (!error->ccid)
+			continue;
+
+		if (i915.enable_execlists)
+			base += LRC_PPHWSP_PN * PAGE_SIZE;
+
+		if (base == (error->ccid & PAGE_MASK))
+			ering->ctx = i915_error_ggtt_object_create(dev_priv, obj);
 	}
 }
 
@@ -1091,7 +1107,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		error->ring[i].cpu_ring_head = rbuf->head;
 		error->ring[i].cpu_ring_tail = rbuf->tail;
 
-		error->ring[i].ringbuffer =
+		error->ring[i].req_ringbuffer =
 			i915_error_ggtt_object_create(dev_priv, rbuf->obj);
 
 		error->ring[i].hws_page =
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/5] drm/i915/error: report ctx id & desc for each request in the queue
  2016-01-28 19:01 [PATCH 0/5] Capture more useful details in error state Arun Siluvery
  2016-01-28 19:01 ` [PATCH 1/5] drm/i915/error: capture execlist state on error Arun Siluvery
  2016-01-28 19:01 ` [PATCH 2/5] drm/i915/error: capture ringbuffer pointed to by START Arun Siluvery
@ 2016-01-28 19:01 ` Arun Siluvery
  2016-01-29  8:17   ` Mika Kuoppala
  2016-01-28 19:01 ` [PATCH 4/5] drm/i915/error: improve CSB reporting Arun Siluvery
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 17+ messages in thread
From: Arun Siluvery @ 2016-01-28 19:01 UTC (permalink / raw)
  To: intel-gfx

From: Dave Gordon <david.s.gordon@intel.com>

Also decode and output CSB entries, in time order

For: VIZ-2021
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  1 +
 drivers/gpu/drm/i915/i915_gpu_error.c | 37 +++++++++++++++++++++++++++--------
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8b510fb..239aaed 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -568,6 +568,7 @@ struct drm_i915_error_state {
 		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
 		struct drm_i915_error_request {
+			uint64_t ctx_desc;
 			long jiffies;
 			u32 seqno;
 			u32 tail;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5c8ec63..a88160c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -311,9 +311,25 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
 	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
 
-	for (i = 0; i < 6; i++) {
-		err_printf(m, "  EXECLIST_CSB[%d]: 0x%08x\n", i, ring->execlist_csb[i]);
-		err_printf(m, "  EXECLIST_CTX[%d]: 0x%08x\n", i, ring->execlist_ctx[i]);
+#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
+#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
+#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
+#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
+#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
+#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+
+	for (i = 1; i <= 6; ++i) {
+		int n = (ring->execlist_csb_write_pointer + i) % 6;
+		u32 csb = ring->execlist_csb[n];
+		err_printf(m, "  EXECLIST_CTX/CSB[%d]:  0x%08x  0x%08x  ",
+			n, ring->execlist_ctx[n], csb);
+		err_printf(m, "%s %s %s %s %s %s\n",
+			csb & GEN8_CTX_STATUS_IDLE_ACTIVE	? "I->A" : "    ",
+			csb & GEN8_CTX_STATUS_PREEMPTED		? "PRMT" : "    ",
+			csb & GEN8_CTX_STATUS_ELEMENT_SWITCH	? "ELSW" : "    ",
+			csb & GEN8_CTX_STATUS_ACTIVE_IDLE	? "A->I" : "    ",
+			csb & GEN8_CTX_STATUS_COMPLETE		? "DONE" : "    ",
+			csb & GEN8_CTX_STATUS_LITE_RESTORE	? "LITE" : "    ");
 	}
 }
 
@@ -468,10 +484,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   dev_priv->ring[i].name,
 				   error->ring[i].num_requests);
 			for (j = 0; j < error->ring[i].num_requests; j++) {
-				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
-					   error->ring[i].requests[j].seqno,
-					   error->ring[i].requests[j].jiffies,
-					   error->ring[i].requests[j].tail);
+				struct drm_i915_error_request *erq;
+				erq = &error->ring[i].requests[j];
+				err_printf(m, "  seqno 0x%08x, tail 0x%08x, "
+					"emitted %ld, ctx_desc 0x%08x_%08x\n",
+					erq->seqno, erq->tail, erq->jiffies,
+					upper_32_bits(erq->ctx_desc),
+					lower_32_bits(erq->ctx_desc));
 			}
 		}
 
@@ -1130,6 +1149,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
 		count = 0;
 		list_for_each_entry(request, &ring->request_list, list) {
+			struct intel_context *ctx = request->ctx;
 			struct drm_i915_error_request *erq;
 
 			if (count >= error->ring[i].num_requests) {
@@ -1152,8 +1172,9 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			}
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
+			erq->ctx_desc = intel_lr_context_descriptor(ctx, ring);
 			erq->jiffies = request->emitted_jiffies;
+			erq->seqno = request->seqno;
 			erq->tail = request->postfix;
 		}
 	}
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/5] drm/i915/error: improve CSB reporting
  2016-01-28 19:01 [PATCH 0/5] Capture more useful details in error state Arun Siluvery
                   ` (2 preceding siblings ...)
  2016-01-28 19:01 ` [PATCH 3/5] drm/i915/error: report ctx id & desc for each request in the queue Arun Siluvery
@ 2016-01-28 19:01 ` Arun Siluvery
  2016-01-28 19:01 ` [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state Arun Siluvery
  2016-01-29 10:59 ` ✗ Fi.CI.BAT: failure for Capture more useful details " Patchwork
  5 siblings, 0 replies; 17+ messages in thread
From: Arun Siluvery @ 2016-01-28 19:01 UTC (permalink / raw)
  To: intel-gfx

From: Dave Gordon <david.s.gordon@intel.com>

v2: add separators for readability

For: VIZ-2021
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> (v2)
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  4 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 87 ++++++++++++++++++++++++-----------
 2 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 239aaed..4b199a4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -561,6 +561,8 @@ struct drm_i915_error_state {
 		u32 execlist_csb[6];
 		u32 execlist_ctx[6];
 
+		u64 ctx_desc;
+
 		struct drm_i915_error_object {
 			int page_count;
 			u64 gtt_offset;
@@ -568,7 +570,7 @@ struct drm_i915_error_state {
 		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
 		struct drm_i915_error_request {
-			uint64_t ctx_desc;
+			u64 ctx_desc;
 			long jiffies;
 			u32 seqno;
 			u32 tail;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a88160c..8b1a1c0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -305,31 +305,60 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 		   hangcheck_action_to_str(ring->hangcheck_action),
 		   ring->hangcheck_score);
 
-	err_printf(m, "  EXECLIST_STATUS: 0x%08x\n", ring->execlist_status);
-	err_printf(m, "  EXECLIST_CTX_ID: 0x%08x\n", ring->execlist_ctx_id);
-	err_printf(m, "  EXECLIST_CSBPTR: 0x%08x\n", ring->execlist_csb_raw_pointer);
-	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
-	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
+	{
+		u32 csb_rd = (ring->execlist_csb_raw_pointer >> 8) & 7;
+
+		err_printf(m, "  EXECLIST_STATUS: 0x%08x\n", ring->execlist_status);
+		err_printf(m, "  EXECLIST_CTX_ID: 0x%08x\n", ring->execlist_ctx_id);
+		err_printf(m, "  EXECLIST_CSBPTR: 0x%08x\n", ring->execlist_csb_raw_pointer);
+		err_printf(m, "  EXECLIST_CSB_WR: %d\n", ring->execlist_csb_write_pointer);
+		err_printf(m, "  EXECLIST_CSB_RD: %d\n", csb_rd);
+		err_printf(m, "  EXECLIST_SWL_RD: %d\n", ring->execlist_csb_read_pointer);
+
+		for (i = 1; i <= 6; ++i) {
+			int n = (ring->execlist_csb_write_pointer + i) % 6;
+			u32 ctxid = ring->execlist_ctx[n];
+			u32 csb = ring->execlist_csb[n];
+			u32 tag = 0;
+			char dot = '.';
+			err_printf(m, "  EXECLIST_CTX/CSB[%d]: ", n);
+
+			if (ctxid && i915.enable_guc_submission) {
+				/* GuC CtxID is ring + flags + (lrca >> 12) */
+				tag = ((ring_idx << 9) | 1);
+			}
+			if ((ctxid >> 20) != tag)
+				dot = '?';		/* flag unexpected value */
+			err_printf(m, "0x%03x%c%05x / ",
+				ctxid >> 20, dot, ctxid & 0x000fffff);
 
+/* CSB status bits */
 #define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
 #define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
 #define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
-
-	for (i = 1; i <= 6; ++i) {
-		int n = (ring->execlist_csb_write_pointer + i) % 6;
-		u32 csb = ring->execlist_csb[n];
-		err_printf(m, "  EXECLIST_CTX/CSB[%d]:  0x%08x  0x%08x  ",
-			n, ring->execlist_ctx[n], csb);
-		err_printf(m, "%s %s %s %s %s %s\n",
-			csb & GEN8_CTX_STATUS_IDLE_ACTIVE	? "I->A" : "    ",
-			csb & GEN8_CTX_STATUS_PREEMPTED		? "PRMT" : "    ",
-			csb & GEN8_CTX_STATUS_ELEMENT_SWITCH	? "ELSW" : "    ",
-			csb & GEN8_CTX_STATUS_ACTIVE_IDLE	? "A->I" : "    ",
-			csb & GEN8_CTX_STATUS_COMPLETE		? "DONE" : "    ",
-			csb & GEN8_CTX_STATUS_LITE_RESTORE	? "LITE" : "    ");
+#define GEN8_CTX_STATUS_UNKNOWN		(~0x0000801f)	/* any other */
+
+			err_printf(m, "0x%08x | %s | %s | %s | %s | %s | %s | %s\n",
+				csb,
+				csb & GEN8_CTX_STATUS_IDLE_ACTIVE	? "I->A" : "    ",
+				csb & GEN8_CTX_STATUS_PREEMPTED		? "PRMT" : "    ",
+				csb & GEN8_CTX_STATUS_ELEMENT_SWITCH	? "ELSW" : "    ",
+				csb & GEN8_CTX_STATUS_ACTIVE_IDLE	? "A->I" : "    ",
+				csb & GEN8_CTX_STATUS_COMPLETE		? "DONE" : "    ",
+				csb & GEN8_CTX_STATUS_LITE_RESTORE	? "LITE" : "    ",
+				csb & GEN8_CTX_STATUS_UNKNOWN		? " +? " : "    ");
+
+			if (i != 6) {
+				if (n == csb_rd)
+					err_printf(m, "                  *RD*\n");
+				else if (n == ring->execlist_csb_read_pointer &&
+					 !i915.enable_guc_submission)
+					err_printf(m, "                  *SW*\n");
+			}
+		}
 	}
 }
 
@@ -495,9 +524,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		}
 
 		if ((obj = error->ring[i].req_ringbuffer)) {
-			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
+			err_printf(m, "%s --- ringbuffer = 0x%08x (ctx_desc 0x%08x_%08x)\n",
 				   dev_priv->ring[i].name,
-				   lower_32_bits(obj->gtt_offset));
+				   lower_32_bits(obj->gtt_offset),
+				   upper_32_bits(error->ring[i].ctx_desc),
+				   lower_32_bits(error->ring[i].ctx_desc));
 			print_error_obj(m, obj);
 		}
 
@@ -1009,8 +1040,6 @@ static void i915_record_ring_state(struct drm_device *dev,
 		u32 status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
 		u8 write_pointer = status_pointer & 0x07;
 		u8 read_pointer = ring->next_context_status_buffer;
-		if (read_pointer > write_pointer)
-			write_pointer += 6;
 
 		ering->execlist_status = I915_READ(RING_EXECLIST_STATUS_LO(ring));
 		ering->execlist_ctx_id = I915_READ(RING_EXECLIST_STATUS_HI(ring));
@@ -1060,6 +1089,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_request *request;
+	u64 ctx_desc;
 	int i, count;
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
@@ -1116,16 +1146,19 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			 * for it to be useful (e.g. dump the context being
 			 * executed).
 			 */
-			if (request)
-				rbuf = request->ctx->engine[ring->id].ringbuf;
-			else
-				rbuf = dev_priv->kernel_context->engine[ring->id].ringbuf;
-		} else
+			struct intel_context *ctx = (request ? request->ctx :
+						     dev_priv->kernel_context);
+			ctx_desc = intel_lr_context_descriptor(ctx, ring);
+			rbuf = ctx->engine[ring->id].ringbuf;
+		} else {
+			ctx_desc = 0;
 			rbuf = ring->buffer;
+		}
 
 		error->ring[i].cpu_ring_head = rbuf->head;
 		error->ring[i].cpu_ring_tail = rbuf->tail;
 
+		error->ring[i].ctx_desc = ctx_desc;
 		error->ring[i].req_ringbuffer =
 			i915_error_ggtt_object_create(dev_priv, rbuf->obj);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state
  2016-01-28 19:01 [PATCH 0/5] Capture more useful details in error state Arun Siluvery
                   ` (3 preceding siblings ...)
  2016-01-28 19:01 ` [PATCH 4/5] drm/i915/error: improve CSB reporting Arun Siluvery
@ 2016-01-28 19:01 ` Arun Siluvery
  2016-01-29  7:52   ` Mika Kuoppala
  2016-01-29 10:59 ` ✗ Fi.CI.BAT: failure for Capture more useful details " Patchwork
  5 siblings, 1 reply; 17+ messages in thread
From: Arun Siluvery @ 2016-01-28 19:01 UTC (permalink / raw)
  To: intel-gfx

From Gen8 onwards we apply ctx workarounds using special batch buffers that
execute during save/restore, good to have them in error state.

Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4b199a4..8440c35 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -567,7 +567,7 @@ struct drm_i915_error_state {
 			int page_count;
 			u64 gtt_offset;
 			u32 *pages[0];
-		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
+		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page, *wa_ctx;
 
 		struct drm_i915_error_request {
 			u64 ctx_desc;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8b1a1c0..e2c32d4 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -561,6 +561,24 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
+		if ((obj = error->ring[i].wa_ctx)) {
+			u64 wa_ctx_offset = obj->gtt_offset;
+			u32 *wa_ctx_page = &obj->pages[0][0];
+
+			err_printf(m, "\n%s --- WA Ctx batch buffer = 0x%08llx\n",
+				   dev_priv->ring[i].name, wa_ctx_offset);
+			offset = 0;
+			for (elt = 0; elt < PAGE_SIZE/32; elt += 4) {
+				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
+					   offset,
+					   wa_ctx_page[elt],
+					   wa_ctx_page[elt+1],
+					   wa_ctx_page[elt+2],
+					   wa_ctx_page[elt+3]);
+				offset += 16;
+			}
+		}
+
 		if ((obj = error->ring[i].ctx)) {
 			err_printf(m, "%s --- HW Context = 0x%08x\n",
 				   dev_priv->ring[i].name,
@@ -654,6 +672,8 @@ static void i915_error_state_free(struct kref *error_ref)
 		i915_error_object_free(error->ring[i].hws_page);
 		i915_error_object_free(error->ring[i].ctx);
 		kfree(error->ring[i].requests);
+		if (i == RCS)
+			i915_error_object_free(error->ring[i].wa_ctx);
 	}
 
 	i915_error_object_free(error->semaphore_obj);
@@ -1165,6 +1185,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		error->ring[i].hws_page =
 			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
 
+		if (INTEL_INFO(dev)->gen >= 8 && ring->id == RCS) {
+			error->ring[i].wa_ctx =
+				i915_error_ggtt_object_create(dev_priv, ring->wa_ctx.obj);
+		}
+
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
 
 		count = 0;
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/5] drm/i915/error: capture execlist state on error
  2016-01-28 19:01 ` [PATCH 1/5] drm/i915/error: capture execlist state on error Arun Siluvery
@ 2016-01-29  7:49   ` Mika Kuoppala
  2016-01-29 11:45     ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Mika Kuoppala @ 2016-01-29  7:49 UTC (permalink / raw)
  To: Arun Siluvery, intel-gfx

Arun Siluvery <arun.siluvery@linux.intel.com> writes:

> From: Dave Gordon <david.s.gordon@intel.com>
>
> At present, execlist status/ctx_id and CSBs, not the submission queue
>
> For: VIZ-2021
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  9 +++++++++
>  drivers/gpu/drm/i915/i915_gpu_error.c | 38 +++++++++++++++++++++++++++++++++--
>  2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 01cc982..8b30242 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -552,6 +552,15 @@ struct drm_i915_error_state {
>  		u32 rc_psmi; /* sleep state */
>  		u32 semaphore_mboxes[I915_NUM_RINGS - 1];
>  
> +		/* Execlists */
> +		u32 execlist_status;
> +		u32 execlist_ctx_id;
> +		u32 execlist_csb_raw_pointer;
> +		u32 execlist_csb_write_pointer;
> +		u32 execlist_csb_read_pointer;
> +		u32 execlist_csb[6];
> +		u32 execlist_ctx[6];
> +
>  		struct drm_i915_error_object {
>  			int page_count;
>  			u64 gtt_offset;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 978c026..bf53c2b 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -247,6 +247,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>  				  int ring_idx)
>  {
>  	struct drm_i915_error_ring *ring = &error->ring[ring_idx];
> +	int i;
>  
>  	if (!ring->valid)
>  		return;
> @@ -288,7 +289,6 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>  		err_printf(m, "  GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode);
>  
>  		if (INTEL_INFO(dev)->gen >= 8) {
> -			int i;
>  			for (i = 0; i < 4; i++)
>  				err_printf(m, "  PDP%d: 0x%016llx\n",
>  					   i, ring->vm_info.pdp[i]);
> @@ -304,6 +304,17 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>  	err_printf(m, "  hangcheck: %s [%d]\n",
>  		   hangcheck_action_to_str(ring->hangcheck_action),
>  		   ring->hangcheck_score);
> +
> +	err_printf(m, "  EXECLIST_STATUS: 0x%08x\n", ring->execlist_status);
> +	err_printf(m, "  EXECLIST_CTX_ID: 0x%08x\n", ring->execlist_ctx_id);
> +	err_printf(m, "  EXECLIST_CSBPTR: 0x%08x\n", ring->execlist_csb_raw_pointer);
> +	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
> +	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
> +
> +	for (i = 0; i < 6; i++) {
> +		err_printf(m, "  EXECLIST_CSB[%d]: 0x%08x\n", i, ring->execlist_csb[i]);
> +		err_printf(m, "  EXECLIST_CTX[%d]: 0x%08x\n", i, ring->execlist_ctx[i]);
> +	}

Please output only if i915.enable_execlists.

>  }
>  
>  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
> @@ -965,8 +976,27 @@ static void i915_record_ring_state(struct drm_device *dev,
>  					I915_READ(GEN8_RING_PDP_LDW(ring, i));
>  			}
>  	}
> -}
>  
> +	if (i915.enable_execlists) {
> +		int i;
> +		u32 status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
> +		u8 write_pointer = status_pointer & 0x07;
> +		u8 read_pointer = ring->next_context_status_buffer;
> +		if (read_pointer > write_pointer)
> +			write_pointer += 6;
> +
> +		ering->execlist_status = I915_READ(RING_EXECLIST_STATUS_LO(ring));
> +		ering->execlist_ctx_id = I915_READ(RING_EXECLIST_STATUS_HI(ring));
> +		ering->execlist_csb_raw_pointer = status_pointer;
> +		ering->execlist_csb_write_pointer = write_pointer;
> +		ering->execlist_csb_read_pointer = read_pointer;
> +
> +		for (i = 0; i < 6; i++) {
> +			ering->execlist_csb[i] = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, i));
> +			ering->execlist_ctx[i] = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, i));
> +		}
> +	}
> +}
>  
>  static void i915_gem_record_active_context(struct intel_engine_cs *ring,
>  					   struct drm_i915_error_state *error,
> @@ -1252,6 +1282,10 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
>  	if (HAS_HW_CONTEXTS(dev))
>  		error->ccid = I915_READ(CCID);
>  
> +	if (HAS_LOGICAL_RING_CONTEXTS(dev)) {
> +		// Surely something to capture here ...
> +	}
> +

Nitpick. Dont add empty block for just documentation. 

Thanks,
-Mika


>  	if (INTEL_INFO(dev)->gen >= 8) {
>  		error->ier = I915_READ(GEN8_DE_MISC_IER);
>  		for (i = 0; i < 4; i++)
> -- 
> 1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state
  2016-01-28 19:01 ` [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state Arun Siluvery
@ 2016-01-29  7:52   ` Mika Kuoppala
  2016-01-29 10:09     ` Arun Siluvery
  0 siblings, 1 reply; 17+ messages in thread
From: Mika Kuoppala @ 2016-01-29  7:52 UTC (permalink / raw)
  To: Arun Siluvery, intel-gfx

Arun Siluvery <arun.siluvery@linux.intel.com> writes:

> From Gen8 onwards we apply ctx workarounds using special batch buffers that
> execute during save/restore, good to have them in error state.
>
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++++++++++++
>  2 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4b199a4..8440c35 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -567,7 +567,7 @@ struct drm_i915_error_state {
>  			int page_count;
>  			u64 gtt_offset;
>  			u32 *pages[0];
> -		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
> +		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page, *wa_ctx;
>  
>  		struct drm_i915_error_request {
>  			u64 ctx_desc;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 8b1a1c0..e2c32d4 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -561,6 +561,24 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  			}
>  		}
>  
> +		if ((obj = error->ring[i].wa_ctx)) {
> +			u64 wa_ctx_offset = obj->gtt_offset;
> +			u32 *wa_ctx_page = &obj->pages[0][0];
> +
> +			err_printf(m, "\n%s --- WA Ctx batch buffer = 0x%08llx\n",
> +				   dev_priv->ring[i].name, wa_ctx_offset);
> +			offset = 0;
> +			for (elt = 0; elt < PAGE_SIZE/32; elt += 4) {

PAGE_SIZE/16 ?

Also we have wa_ctx->size. Is there a reason to output past that?

Assumption is that after wa_ctx->size and BB_END, there should
be zeros only. If it is a concern that something has corrupted
that space, you could print only nonzero ones after ctx_size?

Thanks,
-Mika


> +				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> +					   offset,
> +					   wa_ctx_page[elt],
> +					   wa_ctx_page[elt+1],
> +					   wa_ctx_page[elt+2],
> +					   wa_ctx_page[elt+3]);
> +				offset += 16;
> +			}
> +		}
> +
>  		if ((obj = error->ring[i].ctx)) {
>  			err_printf(m, "%s --- HW Context = 0x%08x\n",
>  				   dev_priv->ring[i].name,
> @@ -654,6 +672,8 @@ static void i915_error_state_free(struct kref *error_ref)
>  		i915_error_object_free(error->ring[i].hws_page);
>  		i915_error_object_free(error->ring[i].ctx);
>  		kfree(error->ring[i].requests);
> +		if (i == RCS)
> +			i915_error_object_free(error->ring[i].wa_ctx);
>  	}
>  
>  	i915_error_object_free(error->semaphore_obj);
> @@ -1165,6 +1185,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		error->ring[i].hws_page =
>  			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
>  
> +		if (INTEL_INFO(dev)->gen >= 8 && ring->id == RCS) {
> +			error->ring[i].wa_ctx =
> +				i915_error_ggtt_object_create(dev_priv, ring->wa_ctx.obj);
> +		}
> +
>  		i915_gem_record_active_context(ring, error, &error->ring[i]);
>  
>  		count = 0;
> -- 
> 1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/5] drm/i915/error: report ctx id & desc for each request in the queue
  2016-01-28 19:01 ` [PATCH 3/5] drm/i915/error: report ctx id & desc for each request in the queue Arun Siluvery
@ 2016-01-29  8:17   ` Mika Kuoppala
  2016-01-29  9:48     ` Arun Siluvery
  0 siblings, 1 reply; 17+ messages in thread
From: Mika Kuoppala @ 2016-01-29  8:17 UTC (permalink / raw)
  To: Arun Siluvery, intel-gfx

Arun Siluvery <arun.siluvery@linux.intel.com> writes:

> From: Dave Gordon <david.s.gordon@intel.com>
>
> Also decode and output CSB entries, in time order
>

Traditionally we have had the decoding burden in
igt/tools/intel_error_decode.

Is there reason not to follow that pattern?
-Mika


> For: VIZ-2021
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  1 +
>  drivers/gpu/drm/i915/i915_gpu_error.c | 37 +++++++++++++++++++++++++++--------
>  2 files changed, 30 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8b510fb..239aaed 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -568,6 +568,7 @@ struct drm_i915_error_state {
>  		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>  
>  		struct drm_i915_error_request {
> +			uint64_t ctx_desc;
>  			long jiffies;
>  			u32 seqno;
>  			u32 tail;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 5c8ec63..a88160c 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -311,9 +311,25 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>  	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
>  	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
>  
> -	for (i = 0; i < 6; i++) {
> -		err_printf(m, "  EXECLIST_CSB[%d]: 0x%08x\n", i, ring->execlist_csb[i]);
> -		err_printf(m, "  EXECLIST_CTX[%d]: 0x%08x\n", i, ring->execlist_ctx[i]);
> +#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
> +#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
> +#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
> +#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
> +#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
> +#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
> +
> +	for (i = 1; i <= 6; ++i) {
> +		int n = (ring->execlist_csb_write_pointer + i) % 6;
> +		u32 csb = ring->execlist_csb[n];
> +		err_printf(m, "  EXECLIST_CTX/CSB[%d]:  0x%08x  0x%08x  ",
> +			n, ring->execlist_ctx[n], csb);
> +		err_printf(m, "%s %s %s %s %s %s\n",
> +			csb & GEN8_CTX_STATUS_IDLE_ACTIVE	? "I->A" : "    ",
> +			csb & GEN8_CTX_STATUS_PREEMPTED		? "PRMT" : "    ",
> +			csb & GEN8_CTX_STATUS_ELEMENT_SWITCH	? "ELSW" : "    ",
> +			csb & GEN8_CTX_STATUS_ACTIVE_IDLE	? "A->I" : "    ",
> +			csb & GEN8_CTX_STATUS_COMPLETE		? "DONE" : "    ",
> +			csb & GEN8_CTX_STATUS_LITE_RESTORE	? "LITE" : "    ");
>  	}
>  }
>  
> @@ -468,10 +484,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  				   dev_priv->ring[i].name,
>  				   error->ring[i].num_requests);
>  			for (j = 0; j < error->ring[i].num_requests; j++) {
> -				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
> -					   error->ring[i].requests[j].seqno,
> -					   error->ring[i].requests[j].jiffies,
> -					   error->ring[i].requests[j].tail);
> +				struct drm_i915_error_request *erq;
> +				erq = &error->ring[i].requests[j];
> +				err_printf(m, "  seqno 0x%08x, tail 0x%08x, "
> +					"emitted %ld, ctx_desc 0x%08x_%08x\n",
> +					erq->seqno, erq->tail, erq->jiffies,
> +					upper_32_bits(erq->ctx_desc),
> +					lower_32_bits(erq->ctx_desc));
>  			}
>  		}
>  
> @@ -1130,6 +1149,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  
>  		count = 0;
>  		list_for_each_entry(request, &ring->request_list, list) {
> +			struct intel_context *ctx = request->ctx;
>  			struct drm_i915_error_request *erq;
>  
>  			if (count >= error->ring[i].num_requests) {
> @@ -1152,8 +1172,9 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  			}
>  
>  			erq = &error->ring[i].requests[count++];
> -			erq->seqno = request->seqno;
> +			erq->ctx_desc = intel_lr_context_descriptor(ctx, ring);
>  			erq->jiffies = request->emitted_jiffies;
> +			erq->seqno = request->seqno;
>  			erq->tail = request->postfix;
>  		}
>  	}
> -- 
> 1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/5] drm/i915/error: report ctx id & desc for each request in the queue
  2016-01-29  8:17   ` Mika Kuoppala
@ 2016-01-29  9:48     ` Arun Siluvery
  0 siblings, 0 replies; 17+ messages in thread
From: Arun Siluvery @ 2016-01-29  9:48 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

On 29/01/2016 08:17, Mika Kuoppala wrote:
> Arun Siluvery <arun.siluvery@linux.intel.com> writes:
>
>> From: Dave Gordon <david.s.gordon@intel.com>
>>
>> Also decode and output CSB entries, in time order
>>
>
> Traditionally we have had the decoding burden in
> igt/tools/intel_error_decode.
>
> Is there reason not to follow that pattern?

I have not use error_decode much, most of the time it is easier to just 
have all the details in the error state itself instead of running 
another tool to decode it. Last when I used it still skips most of the 
unknown commands and it will be the case unless it is completely 
integrated with spec. We can extend the tool with these changes also but 
I think it helps to have this decoded info in error state.

regards
Arun

> -Mika
>
>
>> For: VIZ-2021
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h       |  1 +
>>   drivers/gpu/drm/i915/i915_gpu_error.c | 37 +++++++++++++++++++++++++++--------
>>   2 files changed, 30 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 8b510fb..239aaed 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -568,6 +568,7 @@ struct drm_i915_error_state {
>>   		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>>
>>   		struct drm_i915_error_request {
>> +			uint64_t ctx_desc;
>>   			long jiffies;
>>   			u32 seqno;
>>   			u32 tail;
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 5c8ec63..a88160c 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -311,9 +311,25 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>>   	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
>>   	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
>>
>> -	for (i = 0; i < 6; i++) {
>> -		err_printf(m, "  EXECLIST_CSB[%d]: 0x%08x\n", i, ring->execlist_csb[i]);
>> -		err_printf(m, "  EXECLIST_CTX[%d]: 0x%08x\n", i, ring->execlist_ctx[i]);
>> +#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
>> +#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
>> +#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
>> +#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
>> +#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
>> +#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
>> +
>> +	for (i = 1; i <= 6; ++i) {
>> +		int n = (ring->execlist_csb_write_pointer + i) % 6;
>> +		u32 csb = ring->execlist_csb[n];
>> +		err_printf(m, "  EXECLIST_CTX/CSB[%d]:  0x%08x  0x%08x  ",
>> +			n, ring->execlist_ctx[n], csb);
>> +		err_printf(m, "%s %s %s %s %s %s\n",
>> +			csb & GEN8_CTX_STATUS_IDLE_ACTIVE	? "I->A" : "    ",
>> +			csb & GEN8_CTX_STATUS_PREEMPTED		? "PRMT" : "    ",
>> +			csb & GEN8_CTX_STATUS_ELEMENT_SWITCH	? "ELSW" : "    ",
>> +			csb & GEN8_CTX_STATUS_ACTIVE_IDLE	? "A->I" : "    ",
>> +			csb & GEN8_CTX_STATUS_COMPLETE		? "DONE" : "    ",
>> +			csb & GEN8_CTX_STATUS_LITE_RESTORE	? "LITE" : "    ");
>>   	}
>>   }
>>
>> @@ -468,10 +484,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>>   				   dev_priv->ring[i].name,
>>   				   error->ring[i].num_requests);
>>   			for (j = 0; j < error->ring[i].num_requests; j++) {
>> -				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
>> -					   error->ring[i].requests[j].seqno,
>> -					   error->ring[i].requests[j].jiffies,
>> -					   error->ring[i].requests[j].tail);
>> +				struct drm_i915_error_request *erq;
>> +				erq = &error->ring[i].requests[j];
>> +				err_printf(m, "  seqno 0x%08x, tail 0x%08x, "
>> +					"emitted %ld, ctx_desc 0x%08x_%08x\n",
>> +					erq->seqno, erq->tail, erq->jiffies,
>> +					upper_32_bits(erq->ctx_desc),
>> +					lower_32_bits(erq->ctx_desc));
>>   			}
>>   		}
>>
>> @@ -1130,6 +1149,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>>
>>   		count = 0;
>>   		list_for_each_entry(request, &ring->request_list, list) {
>> +			struct intel_context *ctx = request->ctx;
>>   			struct drm_i915_error_request *erq;
>>
>>   			if (count >= error->ring[i].num_requests) {
>> @@ -1152,8 +1172,9 @@ static void i915_gem_record_rings(struct drm_device *dev,
>>   			}
>>
>>   			erq = &error->ring[i].requests[count++];
>> -			erq->seqno = request->seqno;
>> +			erq->ctx_desc = intel_lr_context_descriptor(ctx, ring);
>>   			erq->jiffies = request->emitted_jiffies;
>> +			erq->seqno = request->seqno;
>>   			erq->tail = request->postfix;
>>   		}
>>   	}
>> --
>> 1.9.1
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state
  2016-01-29  7:52   ` Mika Kuoppala
@ 2016-01-29 10:09     ` Arun Siluvery
  0 siblings, 0 replies; 17+ messages in thread
From: Arun Siluvery @ 2016-01-29 10:09 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

On 29/01/2016 07:52, Mika Kuoppala wrote:
> Arun Siluvery <arun.siluvery@linux.intel.com> writes:
>
>>  From Gen8 onwards we apply ctx workarounds using special batch buffers that
>> execute during save/restore, good to have them in error state.
>>
>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h       |  2 +-
>>   drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++++++++++++
>>   2 files changed, 26 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 4b199a4..8440c35 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -567,7 +567,7 @@ struct drm_i915_error_state {
>>   			int page_count;
>>   			u64 gtt_offset;
>>   			u32 *pages[0];
>> -		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>> +		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page, *wa_ctx;
>>
>>   		struct drm_i915_error_request {
>>   			u64 ctx_desc;
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 8b1a1c0..e2c32d4 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -561,6 +561,24 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>>   			}
>>   		}
>>
>> +		if ((obj = error->ring[i].wa_ctx)) {
>> +			u64 wa_ctx_offset = obj->gtt_offset;
>> +			u32 *wa_ctx_page = &obj->pages[0][0];
>> +
>> +			err_printf(m, "\n%s --- WA Ctx batch buffer = 0x%08llx\n",
>> +				   dev_priv->ring[i].name, wa_ctx_offset);
>> +			offset = 0;
>> +			for (elt = 0; elt < PAGE_SIZE/32; elt += 4) {
>
> PAGE_SIZE/16 ?
>
> Also we have wa_ctx->size. Is there a reason to output past that?

No reason, wa_ctx->size is not the total size, it is the size of one wa 
batch although we can get total size easily by combining all (two) of them.
>
> Assumption is that after wa_ctx->size and BB_END, there should
> be zeros only. If it is a concern that something has corrupted
> that space, you could print only nonzero ones after ctx_size?
No concern that it gets corrupted, I will update the patch to use 
wa_ctx->size and print only size values.

regards
Arun

>
> Thanks,
> -Mika
>
>
>> +				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
>> +					   offset,
>> +					   wa_ctx_page[elt],
>> +					   wa_ctx_page[elt+1],
>> +					   wa_ctx_page[elt+2],
>> +					   wa_ctx_page[elt+3]);
>> +				offset += 16;
>> +			}
>> +		}
>> +
>>   		if ((obj = error->ring[i].ctx)) {
>>   			err_printf(m, "%s --- HW Context = 0x%08x\n",
>>   				   dev_priv->ring[i].name,
>> @@ -654,6 +672,8 @@ static void i915_error_state_free(struct kref *error_ref)
>>   		i915_error_object_free(error->ring[i].hws_page);
>>   		i915_error_object_free(error->ring[i].ctx);
>>   		kfree(error->ring[i].requests);
>> +		if (i == RCS)
>> +			i915_error_object_free(error->ring[i].wa_ctx);
>>   	}
>>
>>   	i915_error_object_free(error->semaphore_obj);
>> @@ -1165,6 +1185,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>>   		error->ring[i].hws_page =
>>   			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
>>
>> +		if (INTEL_INFO(dev)->gen >= 8 && ring->id == RCS) {
>> +			error->ring[i].wa_ctx =
>> +				i915_error_ggtt_object_create(dev_priv, ring->wa_ctx.obj);
>> +		}
>> +
>>   		i915_gem_record_active_context(ring, error, &error->ring[i]);
>>
>>   		count = 0;
>> --
>> 1.9.1
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* ✗ Fi.CI.BAT: failure for Capture more useful details in error state
  2016-01-28 19:01 [PATCH 0/5] Capture more useful details in error state Arun Siluvery
                   ` (4 preceding siblings ...)
  2016-01-28 19:01 ` [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state Arun Siluvery
@ 2016-01-29 10:59 ` Patchwork
  5 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2016-01-29 10:59 UTC (permalink / raw)
  To: arun.siluvery; +Cc: intel-gfx

== Summary ==

Series 2906v1 Capture more useful details in error state
http://patchwork.freedesktop.org/api/1.0/series/2906/revisions/1/mbox/

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (ilk-hp8440p)
Test kms_pipe_crc_basic:
        Subgroup nonblocking-crc-pipe-a:
                skip       -> PASS       (byt-nuc)

bdw-nuci7        total:156  pass:147  dwarn:0   dfail:0   fail:0   skip:9  
bdw-ultra        total:159  pass:147  dwarn:0   dfail:0   fail:0   skip:12 
bsw-nuc-2        total:159  pass:129  dwarn:0   dfail:0   fail:0   skip:30 
byt-nuc          total:159  pass:136  dwarn:0   dfail:0   fail:0   skip:23 
hsw-brixbox      total:159  pass:146  dwarn:0   dfail:0   fail:0   skip:13 
hsw-gt2          total:159  pass:149  dwarn:0   dfail:0   fail:0   skip:10 
ilk-hp8440p      total:159  pass:110  dwarn:0   dfail:0   fail:1   skip:48 
ivb-t430s        total:159  pass:145  dwarn:0   dfail:0   fail:0   skip:14 
skl-i5k-2        total:159  pass:144  dwarn:1   dfail:0   fail:0   skip:14 
snb-dellxps      total:159  pass:137  dwarn:0   dfail:0   fail:0   skip:22 
snb-x220t        total:159  pass:137  dwarn:0   dfail:0   fail:1   skip:21 

Results at /archive/results/CI_IGT_test/Patchwork_1315/

5de97b25e5f3c5a63ee243a9d3b22d30792f7d3e drm-intel-nightly: 2016y-01m-29d-07h-32m-09s UTC integration manifest
13736d189a8c3ad2690c86031fc3c1025a1041cf drm/i915/error: Capture WA ctx batch in error state
c962a66938043f0ea9628aa98893a0a30a0dc31c drm/i915/error: improve CSB reporting
1c7ff22d9a0623d2daeb6ecccd2f90160c3522a5 drm/i915/error: report ctx id & desc for each request in the queue
f5b07bcb66c6e5bdedf806e7a975a81df4a8a436 drm/i915/error: capture ringbuffer pointed to by START
9ed89838d15647dff9460ba0c79f42b0c58831c3 drm/i915/error: capture execlist state on error

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/5] drm/i915/error: capture execlist state on error
  2016-01-29  7:49   ` Mika Kuoppala
@ 2016-01-29 11:45     ` Chris Wilson
  2016-01-29 12:25       ` Arun Siluvery
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2016-01-29 11:45 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Fri, Jan 29, 2016 at 09:49:07AM +0200, Mika Kuoppala wrote:
> Arun Siluvery <arun.siluvery@linux.intel.com> writes:
> 
> > From: Dave Gordon <david.s.gordon@intel.com>
> >
> > At present, execlist status/ctx_id and CSBs, not the submission queue
> >
> > For: VIZ-2021
> > Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h       |  9 +++++++++
> >  drivers/gpu/drm/i915/i915_gpu_error.c | 38 +++++++++++++++++++++++++++++++++--
> >  2 files changed, 45 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 01cc982..8b30242 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -552,6 +552,15 @@ struct drm_i915_error_state {
> >  		u32 rc_psmi; /* sleep state */
> >  		u32 semaphore_mboxes[I915_NUM_RINGS - 1];
> >  
> > +		/* Execlists */
> > +		u32 execlist_status;
> > +		u32 execlist_ctx_id;
> > +		u32 execlist_csb_raw_pointer;
> > +		u32 execlist_csb_write_pointer;
> > +		u32 execlist_csb_read_pointer;
> > +		u32 execlist_csb[6];
> > +		u32 execlist_ctx[6];
> > +
> >  		struct drm_i915_error_object {
> >  			int page_count;
> >  			u64 gtt_offset;
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index 978c026..bf53c2b 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -247,6 +247,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >  				  int ring_idx)
> >  {
> >  	struct drm_i915_error_ring *ring = &error->ring[ring_idx];
> > +	int i;
> >  
> >  	if (!ring->valid)
> >  		return;
> > @@ -288,7 +289,6 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >  		err_printf(m, "  GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode);
> >  
> >  		if (INTEL_INFO(dev)->gen >= 8) {
> > -			int i;
> >  			for (i = 0; i < 4; i++)
> >  				err_printf(m, "  PDP%d: 0x%016llx\n",
> >  					   i, ring->vm_info.pdp[i]);
> > @@ -304,6 +304,17 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >  	err_printf(m, "  hangcheck: %s [%d]\n",
> >  		   hangcheck_action_to_str(ring->hangcheck_action),
> >  		   ring->hangcheck_score);
> > +
> > +	err_printf(m, "  EXECLIST_STATUS: 0x%08x\n", ring->execlist_status);
> > +	err_printf(m, "  EXECLIST_CTX_ID: 0x%08x\n", ring->execlist_ctx_id);
> > +	err_printf(m, "  EXECLIST_CSBPTR: 0x%08x\n", ring->execlist_csb_raw_pointer);
> > +	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
> > +	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
> > +
> > +	for (i = 0; i < 6; i++) {
> > +		err_printf(m, "  EXECLIST_CSB[%d]: 0x%08x\n", i, ring->execlist_csb[i]);
> > +		err_printf(m, "  EXECLIST_CTX[%d]: 0x%08x\n", i, ring->execlist_ctx[i]);
> > +	}
> 
> Please output only if i915.enable_execlists.
> 
> >  }
> >  
> >  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
> > @@ -965,8 +976,27 @@ static void i915_record_ring_state(struct drm_device *dev,
> >  					I915_READ(GEN8_RING_PDP_LDW(ring, i));
> >  			}
> >  	}
> > -}
> >  
> > +	if (i915.enable_execlists) {
> > +		int i;
> > +		u32 status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
> > +		u8 write_pointer = status_pointer & 0x07;
> > +		u8 read_pointer = ring->next_context_status_buffer;
> > +		if (read_pointer > write_pointer)
> > +			write_pointer += 6;
> > +
> > +		ering->execlist_status = I915_READ(RING_EXECLIST_STATUS_LO(ring));
> > +		ering->execlist_ctx_id = I915_READ(RING_EXECLIST_STATUS_HI(ring));
> > +		ering->execlist_csb_raw_pointer = status_pointer;
> > +		ering->execlist_csb_write_pointer = write_pointer;
> > +		ering->execlist_csb_read_pointer = read_pointer;

Just the registers. How do you plan to use these to debug anything?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/5] drm/i915/error: capture ringbuffer pointed to by START
  2016-01-28 19:01 ` [PATCH 2/5] drm/i915/error: capture ringbuffer pointed to by START Arun Siluvery
@ 2016-01-29 11:47   ` Chris Wilson
  2016-02-01 21:30     ` Arun Siluvery
  0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2016-01-29 11:47 UTC (permalink / raw)
  To: Arun Siluvery; +Cc: intel-gfx

On Thu, Jan 28, 2016 at 07:01:21PM +0000, Arun Siluvery wrote:
> From: Dave Gordon <david.s.gordon@intel.com>
> 
> For: VIZ-2021
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>

What information does this actually provide over and above the hw is not
executing the ring we expect? How have you used this, how do you plan
to?

As it stands adding more fragile loops is just increasing the potential
for an OOPS in that code, even more so as we can eliminate the current
dangerous list iteration for extracting the current ctx object.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/5] drm/i915/error: capture execlist state on error
  2016-01-29 11:45     ` Chris Wilson
@ 2016-01-29 12:25       ` Arun Siluvery
  2016-01-29 12:38         ` Chris Wilson
  0 siblings, 1 reply; 17+ messages in thread
From: Arun Siluvery @ 2016-01-29 12:25 UTC (permalink / raw)
  To: Chris Wilson, Mika Kuoppala, intel-gfx, Dave Gordon

On 29/01/2016 11:45, Chris Wilson wrote:
> On Fri, Jan 29, 2016 at 09:49:07AM +0200, Mika Kuoppala wrote:
>> Arun Siluvery <arun.siluvery@linux.intel.com> writes:
>>
>>> From: Dave Gordon <david.s.gordon@intel.com>
>>>
>>> At present, execlist status/ctx_id and CSBs, not the submission queue
>>>
>>> For: VIZ-2021
>>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h       |  9 +++++++++
>>>   drivers/gpu/drm/i915/i915_gpu_error.c | 38 +++++++++++++++++++++++++++++++++--
>>>   2 files changed, 45 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 01cc982..8b30242 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -552,6 +552,15 @@ struct drm_i915_error_state {
>>>   		u32 rc_psmi; /* sleep state */
>>>   		u32 semaphore_mboxes[I915_NUM_RINGS - 1];
>>>
>>> +		/* Execlists */
>>> +		u32 execlist_status;
>>> +		u32 execlist_ctx_id;
>>> +		u32 execlist_csb_raw_pointer;
>>> +		u32 execlist_csb_write_pointer;
>>> +		u32 execlist_csb_read_pointer;
>>> +		u32 execlist_csb[6];
>>> +		u32 execlist_ctx[6];
>>> +
>>>   		struct drm_i915_error_object {
>>>   			int page_count;
>>>   			u64 gtt_offset;
>>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>>> index 978c026..bf53c2b 100644
>>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>>> @@ -247,6 +247,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>>>   				  int ring_idx)
>>>   {
>>>   	struct drm_i915_error_ring *ring = &error->ring[ring_idx];
>>> +	int i;
>>>
>>>   	if (!ring->valid)
>>>   		return;
>>> @@ -288,7 +289,6 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>>>   		err_printf(m, "  GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode);
>>>
>>>   		if (INTEL_INFO(dev)->gen >= 8) {
>>> -			int i;
>>>   			for (i = 0; i < 4; i++)
>>>   				err_printf(m, "  PDP%d: 0x%016llx\n",
>>>   					   i, ring->vm_info.pdp[i]);
>>> @@ -304,6 +304,17 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
>>>   	err_printf(m, "  hangcheck: %s [%d]\n",
>>>   		   hangcheck_action_to_str(ring->hangcheck_action),
>>>   		   ring->hangcheck_score);
>>> +
>>> +	err_printf(m, "  EXECLIST_STATUS: 0x%08x\n", ring->execlist_status);
>>> +	err_printf(m, "  EXECLIST_CTX_ID: 0x%08x\n", ring->execlist_ctx_id);
>>> +	err_printf(m, "  EXECLIST_CSBPTR: 0x%08x\n", ring->execlist_csb_raw_pointer);
>>> +	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
>>> +	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
>>> +
>>> +	for (i = 0; i < 6; i++) {
>>> +		err_printf(m, "  EXECLIST_CSB[%d]: 0x%08x\n", i, ring->execlist_csb[i]);
>>> +		err_printf(m, "  EXECLIST_CTX[%d]: 0x%08x\n", i, ring->execlist_ctx[i]);
>>> +	}
>>
>> Please output only if i915.enable_execlists.
>>
>>>   }
>>>
>>>   void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
>>> @@ -965,8 +976,27 @@ static void i915_record_ring_state(struct drm_device *dev,
>>>   					I915_READ(GEN8_RING_PDP_LDW(ring, i));
>>>   			}
>>>   	}
>>> -}
>>>
>>> +	if (i915.enable_execlists) {
>>> +		int i;
>>> +		u32 status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
>>> +		u8 write_pointer = status_pointer & 0x07;
>>> +		u8 read_pointer = ring->next_context_status_buffer;
>>> +		if (read_pointer > write_pointer)
>>> +			write_pointer += 6;
>>> +
>>> +		ering->execlist_status = I915_READ(RING_EXECLIST_STATUS_LO(ring));
>>> +		ering->execlist_ctx_id = I915_READ(RING_EXECLIST_STATUS_HI(ring));
>>> +		ering->execlist_csb_raw_pointer = status_pointer;
>>> +		ering->execlist_csb_write_pointer = write_pointer;
>>> +		ering->execlist_csb_read_pointer = read_pointer;
>
> Just the registers. How do you plan to use these to debug anything?
csb pointers are helpful when there is an inconsistency between sw and 
hw which can happen if we miss ctx switch interrupts.

regards
Arun

> -Chris
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/5] drm/i915/error: capture execlist state on error
  2016-01-29 12:25       ` Arun Siluvery
@ 2016-01-29 12:38         ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2016-01-29 12:38 UTC (permalink / raw)
  To: Arun Siluvery; +Cc: intel-gfx

On Fri, Jan 29, 2016 at 12:25:02PM +0000, Arun Siluvery wrote:
> On 29/01/2016 11:45, Chris Wilson wrote:
> >On Fri, Jan 29, 2016 at 09:49:07AM +0200, Mika Kuoppala wrote:
> >>Arun Siluvery <arun.siluvery@linux.intel.com> writes:
> >>
> >>>From: Dave Gordon <david.s.gordon@intel.com>
> >>>
> >>>At present, execlist status/ctx_id and CSBs, not the submission queue
> >>>
> >>>For: VIZ-2021
> >>>Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> >>>---
> >>>  drivers/gpu/drm/i915/i915_drv.h       |  9 +++++++++
> >>>  drivers/gpu/drm/i915/i915_gpu_error.c | 38 +++++++++++++++++++++++++++++++++--
> >>>  2 files changed, 45 insertions(+), 2 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >>>index 01cc982..8b30242 100644
> >>>--- a/drivers/gpu/drm/i915/i915_drv.h
> >>>+++ b/drivers/gpu/drm/i915/i915_drv.h
> >>>@@ -552,6 +552,15 @@ struct drm_i915_error_state {
> >>>  		u32 rc_psmi; /* sleep state */
> >>>  		u32 semaphore_mboxes[I915_NUM_RINGS - 1];
> >>>
> >>>+		/* Execlists */
> >>>+		u32 execlist_status;
> >>>+		u32 execlist_ctx_id;
> >>>+		u32 execlist_csb_raw_pointer;
> >>>+		u32 execlist_csb_write_pointer;
> >>>+		u32 execlist_csb_read_pointer;
> >>>+		u32 execlist_csb[6];
> >>>+		u32 execlist_ctx[6];
> >>>+
> >>>  		struct drm_i915_error_object {
> >>>  			int page_count;
> >>>  			u64 gtt_offset;
> >>>diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> >>>index 978c026..bf53c2b 100644
> >>>--- a/drivers/gpu/drm/i915/i915_gpu_error.c
> >>>+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> >>>@@ -247,6 +247,7 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >>>  				  int ring_idx)
> >>>  {
> >>>  	struct drm_i915_error_ring *ring = &error->ring[ring_idx];
> >>>+	int i;
> >>>
> >>>  	if (!ring->valid)
> >>>  		return;
> >>>@@ -288,7 +289,6 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >>>  		err_printf(m, "  GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode);
> >>>
> >>>  		if (INTEL_INFO(dev)->gen >= 8) {
> >>>-			int i;
> >>>  			for (i = 0; i < 4; i++)
> >>>  				err_printf(m, "  PDP%d: 0x%016llx\n",
> >>>  					   i, ring->vm_info.pdp[i]);
> >>>@@ -304,6 +304,17 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> >>>  	err_printf(m, "  hangcheck: %s [%d]\n",
> >>>  		   hangcheck_action_to_str(ring->hangcheck_action),
> >>>  		   ring->hangcheck_score);
> >>>+
> >>>+	err_printf(m, "  EXECLIST_STATUS: 0x%08x\n", ring->execlist_status);
> >>>+	err_printf(m, "  EXECLIST_CTX_ID: 0x%08x\n", ring->execlist_ctx_id);
> >>>+	err_printf(m, "  EXECLIST_CSBPTR: 0x%08x\n", ring->execlist_csb_raw_pointer);
> >>>+	err_printf(m, "  EXECLIST_CSB_WR: 0x%08x\n", ring->execlist_csb_write_pointer);
> >>>+	err_printf(m, "  EXECLIST_CSB_RD: 0x%08x\n", ring->execlist_csb_read_pointer);
> >>>+
> >>>+	for (i = 0; i < 6; i++) {
> >>>+		err_printf(m, "  EXECLIST_CSB[%d]: 0x%08x\n", i, ring->execlist_csb[i]);
> >>>+		err_printf(m, "  EXECLIST_CTX[%d]: 0x%08x\n", i, ring->execlist_ctx[i]);
> >>>+	}
> >>
> >>Please output only if i915.enable_execlists.
> >>
> >>>  }
> >>>
> >>>  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
> >>>@@ -965,8 +976,27 @@ static void i915_record_ring_state(struct drm_device *dev,
> >>>  					I915_READ(GEN8_RING_PDP_LDW(ring, i));
> >>>  			}
> >>>  	}
> >>>-}
> >>>
> >>>+	if (i915.enable_execlists) {
> >>>+		int i;
> >>>+		u32 status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
> >>>+		u8 write_pointer = status_pointer & 0x07;
> >>>+		u8 read_pointer = ring->next_context_status_buffer;
> >>>+		if (read_pointer > write_pointer)
> >>>+			write_pointer += 6;
> >>>+
> >>>+		ering->execlist_status = I915_READ(RING_EXECLIST_STATUS_LO(ring));
> >>>+		ering->execlist_ctx_id = I915_READ(RING_EXECLIST_STATUS_HI(ring));
> >>>+		ering->execlist_csb_raw_pointer = status_pointer;
> >>>+		ering->execlist_csb_write_pointer = write_pointer;
> >>>+		ering->execlist_csb_read_pointer = read_pointer;
> >
> >Just the registers. How do you plan to use these to debug anything?
> csb pointers are helpful when there is an inconsistency between sw
> and hw which can happen if we miss ctx switch interrupts.

Yes, but you are printing them twice. (next_context_status_buffer can be
shot btw)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/5] drm/i915/error: capture ringbuffer pointed to by START
  2016-01-29 11:47   ` Chris Wilson
@ 2016-02-01 21:30     ` Arun Siluvery
  0 siblings, 0 replies; 17+ messages in thread
From: Arun Siluvery @ 2016-02-01 21:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Mika Kuoppala, Dave Gordon

On 29/01/2016 11:47, Chris Wilson wrote:
> On Thu, Jan 28, 2016 at 07:01:21PM +0000, Arun Siluvery wrote:
>> From: Dave Gordon <david.s.gordon@intel.com>
>>
>> For: VIZ-2021
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>
> What information does this actually provide over and above the hw is not
> executing the ring we expect? How have you used this, how do you plan
> to?
Most of the times this matches with the ringbuffer we dump but when 
there is inconsistency it helps to know what hw is actually executing as 
opposed to what we expect, otherwise the head, tail values we capture 
and the instructions at those offsets don't make sense. Without this we 
don't have any idea what the HW was doing and what caused hang.

regards
Arun

>
> As it stands adding more fragile loops is just increasing the potential
> for an OOPS in that code, even more so as we can eliminate the current
> dangerous list iteration for extracting the current ctx object.
> -Chris
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2016-02-01 21:30 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-28 19:01 [PATCH 0/5] Capture more useful details in error state Arun Siluvery
2016-01-28 19:01 ` [PATCH 1/5] drm/i915/error: capture execlist state on error Arun Siluvery
2016-01-29  7:49   ` Mika Kuoppala
2016-01-29 11:45     ` Chris Wilson
2016-01-29 12:25       ` Arun Siluvery
2016-01-29 12:38         ` Chris Wilson
2016-01-28 19:01 ` [PATCH 2/5] drm/i915/error: capture ringbuffer pointed to by START Arun Siluvery
2016-01-29 11:47   ` Chris Wilson
2016-02-01 21:30     ` Arun Siluvery
2016-01-28 19:01 ` [PATCH 3/5] drm/i915/error: report ctx id & desc for each request in the queue Arun Siluvery
2016-01-29  8:17   ` Mika Kuoppala
2016-01-29  9:48     ` Arun Siluvery
2016-01-28 19:01 ` [PATCH 4/5] drm/i915/error: improve CSB reporting Arun Siluvery
2016-01-28 19:01 ` [PATCH 5/5] drm/i915/error: Capture WA ctx batch in error state Arun Siluvery
2016-01-29  7:52   ` Mika Kuoppala
2016-01-29 10:09     ` Arun Siluvery
2016-01-29 10:59 ` ✗ Fi.CI.BAT: failure for Capture more useful details " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.