All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
@ 2020-09-15 12:41 Chris Wilson
  2020-09-15 12:41   ` [Intel-gfx] " Chris Wilson
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Chris Wilson @ 2020-09-15 12:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

A CSB entry is 64b, and it is simpler for us to treat it as an array of
64b entries than as an array of pairs of 32b entries.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 33 ++++++++++----------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c400aaa2287b..ee6312601c56 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -278,7 +278,7 @@ struct intel_engine_execlists {
 	 *
 	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	u32 *csb_status;
+	u64 *csb_status;
 
 	/**
 	 * @csb_size: context status buffer FIFO size
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0412a44f25f2..d6e0f62337b4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2464,7 +2464,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
 }
 
 static inline void
-invalidate_csb_entries(const u32 *first, const u32 *last)
+invalidate_csb_entries(const u64 *first, const u64 *last)
 {
 	clflush((void *)first);
 	clflush((void *)last);
@@ -2496,14 +2496,12 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
  *     bits 47-57: sw context id of the lrc the GT switched away from
  *     bits 58-63: sw counter of the lrc the GT switched away from
  */
-static inline bool
-gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u32 lower_dw = csb[0];
-	u32 upper_dw = csb[1];
-	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
-	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+	u64 entry = READ_ONCE(*csb);
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	bool new_queue =
+		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
 	 * The context switch detail is not guaranteed to be 5 when a preemption
@@ -2513,7 +2511,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * would require some extra handling, but we don't support that.
 	 */
 	if (!ctx_away_valid || new_queue) {
-		GEM_BUG_ON(!ctx_to_valid);
+		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
 		return true;
 	}
 
@@ -2522,12 +2520,11 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * context switch on an unsuccessful wait instruction since we always
 	 * use polling mode.
 	 */
-	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
 	return false;
 }
 
-static inline bool
-gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+static inline bool gen8_csb_parse(const u64 *csb)
 {
 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
 }
@@ -2535,7 +2532,7 @@ gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	const u32 * const buf = execlists->csb_status;
+	const u64 * const buf = execlists->csb_status;
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
@@ -2616,12 +2613,14 @@ static void process_csb(struct intel_engine_cs *engine)
 		 */
 
 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
-			     head, buf[2 * head + 0], buf[2 * head + 1]);
+			     head,
+			     upper_32_bits(buf[head]),
+			     lower_32_bits(buf[head]));
 
 		if (INTEL_GEN(engine->i915) >= 12)
-			promote = gen12_csb_parse(execlists, buf + 2 * head);
+			promote = gen12_csb_parse(buf + head);
 		else
-			promote = gen8_csb_parse(execlists, buf + 2 * head);
+			promote = gen8_csb_parse(buf + head);
 		if (promote) {
 			struct i915_request * const *old = execlists->active;
 
@@ -5157,7 +5156,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	}
 
 	execlists->csb_status =
-		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
+		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-09-15 12:41 [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
@ 2020-09-15 12:41   ` Chris Wilson
  2020-09-15 12:41 ` [Intel-gfx] [PATCH 3/4] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2020-09-15 12:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Bruce Chang, Mika Kuoppala, stable

On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
Forcibly evict stale csb entries") where, presumably, due to a missing
Global Observation Point synchronisation, the write pointer of the CSB
ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
we see the GPU report more context-switch entries for us to parse, but
those entries have not been written, leading us to process stale events,
and eventually report a hung GPU.

However, this effect appears to be much more severe than we previously
saw on Icelake (though it might be best if we try the same approach
there as well and measure), and Bruce suggested the good idea of resetting
the CSB entry after use so that we can detect when it has been updated by
the GPU. By instrumenting how long that may be, we can set a reliable
upper bound for how long we should wait for:

    513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: stable@vger.kernel.org # v5.4
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d6e0f62337b4..d75712a503b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
  */
 static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u64 entry = READ_ONCE(*csb);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	bool new_queue =
+	bool ctx_away_valid;
+	bool new_queue;
+	u64 entry;
+
+	/* HSD#22011248461 */
+	entry = READ_ONCE(*csb);
+	if (unlikely(entry == -1)) {
+		preempt_disable();
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+			GEM_WARN_ON("50us CSB timeout");
+		preempt_enable();
+	}
+	WRITE_ONCE(*(u64 *)csb, -1);
+
+	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	new_queue =
 		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
@@ -4004,6 +4017,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	WRITE_ONCE(*execlists->csb_write, reset_value);
 	wmb(); /* Make sure this is visible to HW (paranoia?) */
 
+	/* Check that the GPU does indeed update the CSB entries! */
+	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
 	invalidate_csb_entries(&execlists->csb_status[0],
 			       &execlists->csb_status[reset_value]);
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-gfx] [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-09-15 12:41   ` Chris Wilson
  0 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2020-09-15 12:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable, Chris Wilson

On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
Forcibly evict stale csb entries") where, presumably, due to a missing
Global Observation Point synchronisation, the write pointer of the CSB
ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
we see the GPU report more context-switch entries for us to parse, but
those entries have not been written, leading us to process stale events,
and eventually report a hung GPU.

However, this effect appears to be much more severe than we previously
saw on Icelake (though it might be best if we try the same approach
there as well and measure), and Bruce suggested the good idea of resetting
the CSB entry after use so that we can detect when it has been updated by
the GPU. By instrumenting how long that may be, we can set a reliable
upper bound for how long we should wait for:

    513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: stable@vger.kernel.org # v5.4
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d6e0f62337b4..d75712a503b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
  */
 static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u64 entry = READ_ONCE(*csb);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	bool new_queue =
+	bool ctx_away_valid;
+	bool new_queue;
+	u64 entry;
+
+	/* HSD#22011248461 */
+	entry = READ_ONCE(*csb);
+	if (unlikely(entry == -1)) {
+		preempt_disable();
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+			GEM_WARN_ON("50us CSB timeout");
+		preempt_enable();
+	}
+	WRITE_ONCE(*(u64 *)csb, -1);
+
+	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	new_queue =
 		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
@@ -4004,6 +4017,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	WRITE_ONCE(*execlists->csb_write, reset_value);
 	wmb(); /* Make sure this is visible to HW (paranoia?) */
 
+	/* Check that the GPU does indeed update the CSB entries! */
+	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
 	invalidate_csb_entries(&execlists->csb_status[0],
 			       &execlists->csb_status[reset_value]);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-gfx] [PATCH 3/4] drm/i915/gt: Apply the CSB w/a for all
  2020-09-15 12:41 [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
  2020-09-15 12:41   ` [Intel-gfx] " Chris Wilson
@ 2020-09-15 12:41 ` Chris Wilson
  2020-09-15 13:29   ` Mika Kuoppala
  2020-09-15 12:41 ` [Intel-gfx] [PATCH 4/4] drm/i915/gt: Use a mmio read of the CSB in case of failure Chris Wilson
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Chris Wilson @ 2020-09-15 12:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Since we expect to inline the csb_parse() routines, the w/a for the
stale CSB data on Tigerlake will be pulled into process_csb(), and so we
might as well simply reuse the logic for all, and so will hopefully
avoid any strange behaviour on Icelake that was not covered by our
previous w/a.

References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 79 +++++++++++++++++++----------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d75712a503b7..fcb6ec3d55f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
  *     bits 47-57: sw context id of the lrc the GT switched away from
  *     bits 58-63: sw counter of the lrc the GT switched away from
  */
-static inline bool gen12_csb_parse(const u64 *csb)
+static inline bool gen12_csb_parse(const u64 csb)
 {
-	bool ctx_away_valid;
-	bool new_queue;
-	u64 entry;
-
-	/* HSD#22011248461 */
-	entry = READ_ONCE(*csb);
-	if (unlikely(entry == -1)) {
-		preempt_disable();
-		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
-			GEM_WARN_ON("50us CSB timeout");
-		preempt_enable();
-	}
-	WRITE_ONCE(*(u64 *)csb, -1);
-
-	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	new_queue =
-		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
+	bool new_queue =
+		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
 	 * The context switch detail is not guaranteed to be 5 when a preemption
@@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
 	 * would require some extra handling, but we don't support that.
 	 */
 	if (!ctx_away_valid || new_queue) {
-		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
+		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
 		return true;
 	}
 
@@ -2533,19 +2519,56 @@ static inline bool gen12_csb_parse(const u64 *csb)
 	 * context switch on an unsuccessful wait instruction since we always
 	 * use polling mode.
 	 */
-	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
 	return false;
 }
 
-static inline bool gen8_csb_parse(const u64 *csb)
+static inline bool gen8_csb_parse(const u64 csb)
+{
+	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+}
+
+static noinline u64 wa_csb_read(u64 * const csb)
 {
-	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+	u64 entry;
+
+	preempt_disable();
+	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+		GEM_WARN_ON("50us CSB timeout");
+	preempt_enable();
+
+	return entry;
+}
+
+static inline u64 csb_read(u64 * const csb)
+{
+	u64 entry = READ_ONCE(*csb);
+
+	/*
+	 * Unfortunately, the GPU does not always serialise its write
+	 * of the CSB entries before its write of the CSB pointer, at least
+	 * from the perspective of the CPU, using what is known as a Global
+	 * Observation Point. We may read a new CSB tail pointer, but then
+	 * read the stale CSB entries, causing us to misinterpret the
+	 * context-switch events, and eventually declare the GPU hung.
+	 *
+	 * icl:HSDES#1806554093
+	 * tgl:HSDES#22011248461
+	 */
+	if (unlikely(entry == -1))
+		entry = wa_csb_read(csb);
+
+	/* Consume this entry so that we can spot its future reuse. */
+	WRITE_ONCE(*csb, -1);
+
+	/* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
+	return entry;
 }
 
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	const u64 * const buf = execlists->csb_status;
+	u64 * const buf = execlists->csb_status;
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
@@ -2603,6 +2626,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 	do {
 		bool promote;
+		u64 csb;
 
 		if (++head == num_entries)
 			head = 0;
@@ -2625,15 +2649,14 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
+		csb = csb_read(buf + head);
 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
-			     head,
-			     upper_32_bits(buf[head]),
-			     lower_32_bits(buf[head]));
+			     head, upper_32_bits(csb), lower_32_bits(csb));
 
 		if (INTEL_GEN(engine->i915) >= 12)
-			promote = gen12_csb_parse(buf + head);
+			promote = gen12_csb_parse(csb);
 		else
-			promote = gen8_csb_parse(buf + head);
+			promote = gen8_csb_parse(csb);
 		if (promote) {
 			struct i915_request * const *old = execlists->active;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-gfx] [PATCH 4/4] drm/i915/gt: Use a mmio read of the CSB in case of failure
  2020-09-15 12:41 [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
  2020-09-15 12:41   ` [Intel-gfx] " Chris Wilson
  2020-09-15 12:41 ` [Intel-gfx] [PATCH 3/4] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
@ 2020-09-15 12:41 ` Chris Wilson
  2020-09-15 13:39   ` Mika Kuoppala
  2020-09-15 13:56 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Chris Wilson @ 2020-09-15 12:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

If we find the GPU didn't update the CSB within 50us, we currently fail
and eventually reset the GPU. Lets report the value from the mmio space
as a last resort, it may just stave off an unnecessary GPU reset.

Suggested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c     | 26 +++++++++++++++++++------
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h |  3 +++
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index fcb6ec3d55f4..7cf208311539 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2528,19 +2528,33 @@ static inline bool gen8_csb_parse(const u64 csb)
 	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
 }
 
-static noinline u64 wa_csb_read(u64 * const csb)
+static noinline u64
+wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
 {
 	u64 entry;
 
 	preempt_disable();
-	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
-		GEM_WARN_ON("50us CSB timeout");
+	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50)) {
+		int idx = csb - engine->execlists.csb_status;
+		int status;
+
+		status = GEN8_EXECLISTS_STATUS_BUF;
+		if (idx >= 6) {
+			status = GEN11_EXECLISTS_STATUS_BUF2;
+			idx -= 6;
+		}
+		status += sizeof(u64) * idx;
+
+		entry = intel_uncore_read64(engine->uncore,
+					    _MMIO(engine->mmio_base + status));
+	}
 	preempt_enable();
 
 	return entry;
 }
 
-static inline u64 csb_read(u64 * const csb)
+static inline u64
+csb_read(const struct intel_engine_cs *engine, u64 * const csb)
 {
 	u64 entry = READ_ONCE(*csb);
 
@@ -2556,7 +2570,7 @@ static inline u64 csb_read(u64 * const csb)
 	 * tgl:HSDES#22011248461
 	 */
 	if (unlikely(entry == -1))
-		entry = wa_csb_read(csb);
+		entry = wa_csb_read(engine, csb);
 
 	/* Consume this entry so that we can spot its future reuse. */
 	WRITE_ONCE(*csb, -1);
@@ -2649,7 +2663,7 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
-		csb = csb_read(buf + head);
+		csb = csb_read(engine, buf + head);
 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
 			     head, upper_32_bits(csb), lower_32_bits(csb));
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 93cb6c460508..1b51f7b9a5c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -49,4 +49,7 @@
 #define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x1A
 #define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0xD
 
+#define GEN8_EXECLISTS_STATUS_BUF 0x370
+#define GEN11_EXECLISTS_STATUS_BUF2 0x3c0
+
 #endif /* _INTEL_LRC_REG_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-09-15 12:41   ` [Intel-gfx] " Chris Wilson
  (?)
@ 2020-09-15 13:19   ` Mika Kuoppala
  -1 siblings, 0 replies; 18+ messages in thread
From: Mika Kuoppala @ 2020-09-15 13:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable, Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> Forcibly evict stale csb entries") where, presumably, due to a missing
> Global Observation Point synchronisation, the write pointer of the CSB
> ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> we see the GPU report more context-switch entries for us to parse, but
> those entries have not been written, leading us to process stale events,
> and eventually report a hung GPU.
>
> However, this effect appears to be much more severe than we previously
> saw on Icelake (though it might be best if we try the same approach
> there as well and measure), and Bruce suggested the good idea of resetting
> the CSB entry after use so that we can detect when it has been updated by
> the GPU. By instrumenting how long that may be, we can set a reliable
> upper bound for how long we should wait for:
>
>     513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
>
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")

References: HSDES#22011327657, HSDES#1508287568

> Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Bruce Chang <yu.bruce.chang@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: stable@vger.kernel.org # v5.4
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index d6e0f62337b4..d75712a503b7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>   */
>  static inline bool gen12_csb_parse(const u64 *csb)
>  {
> -	u64 entry = READ_ONCE(*csb);
> -	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> -	bool new_queue =
> +	bool ctx_away_valid;
> +	bool new_queue;
> +	u64 entry;
> +
> +	/* HSD#22011248461 */
> +	entry = READ_ONCE(*csb);
> +	if (unlikely(entry == -1)) {
> +		preempt_disable();
> +		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> +			GEM_WARN_ON("50us CSB timeout");

hsdes is for 30us. But this will be the threshold to fallback further
into mmio. Intuition and data of rarity of this will help find the
good enough value.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +		preempt_enable();
> +	}
> +	WRITE_ONCE(*(u64 *)csb, -1);
> +
> +	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> +	new_queue =
>  		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>  
>  	/*
> @@ -4004,6 +4017,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
>  	WRITE_ONCE(*execlists->csb_write, reset_value);
>  	wmb(); /* Make sure this is visible to HW (paranoia?) */
>  
> +	/* Check that the GPU does indeed update the CSB entries! */
> +	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
>  	invalidate_csb_entries(&execlists->csb_status[0],
>  			       &execlists->csb_status[reset_value]);
>  
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Intel-gfx] [PATCH 3/4] drm/i915/gt: Apply the CSB w/a for all
  2020-09-15 12:41 ` [Intel-gfx] [PATCH 3/4] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
@ 2020-09-15 13:29   ` Mika Kuoppala
  0 siblings, 0 replies; 18+ messages in thread
From: Mika Kuoppala @ 2020-09-15 13:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since we expect to inline the csb_parse() routines, the w/a for the
> stale CSB data on Tigerlake will be pulled into process_csb(), and so we
> might as well simply reuse the logic for all, and so will hopefully
> avoid any strange behaviour on Icelake that was not covered by our
> previous w/a.
>
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Bruce Chang <yu.bruce.chang@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 79 +++++++++++++++++++----------
>  1 file changed, 51 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index d75712a503b7..fcb6ec3d55f4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>   *     bits 47-57: sw context id of the lrc the GT switched away from
>   *     bits 58-63: sw counter of the lrc the GT switched away from
>   */
> -static inline bool gen12_csb_parse(const u64 *csb)
> +static inline bool gen12_csb_parse(const u64 csb)
>  {
> -	bool ctx_away_valid;
> -	bool new_queue;
> -	u64 entry;
> -
> -	/* HSD#22011248461 */
> -	entry = READ_ONCE(*csb);
> -	if (unlikely(entry == -1)) {
> -		preempt_disable();
> -		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> -			GEM_WARN_ON("50us CSB timeout");
> -		preempt_enable();
> -	}
> -	WRITE_ONCE(*(u64 *)csb, -1);
> -
> -	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> -	new_queue =
> -		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> +	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
> +	bool new_queue =
> +		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>  
>  	/*
>  	 * The context switch detail is not guaranteed to be 5 when a preemption
> @@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
>  	 * would require some extra handling, but we don't support that.
>  	 */
>  	if (!ctx_away_valid || new_queue) {
> -		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
> +		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
>  		return true;
>  	}
>  
> @@ -2533,19 +2519,56 @@ static inline bool gen12_csb_parse(const u64 *csb)
>  	 * context switch on an unsuccessful wait instruction since we always
>  	 * use polling mode.
>  	 */
> -	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
> +	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
>  	return false;
>  }
>  
> -static inline bool gen8_csb_parse(const u64 *csb)
> +static inline bool gen8_csb_parse(const u64 csb)
> +{
> +	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +}
> +
> +static noinline u64 wa_csb_read(u64 * const csb)
>  {
> -	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +	u64 entry;
> +
> +	preempt_disable();
> +	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> +		GEM_WARN_ON("50us CSB timeout");
> +	preempt_enable();
> +
> +	return entry;
> +}
> +
> +static inline u64 csb_read(u64 * const csb)
> +{
> +	u64 entry = READ_ONCE(*csb);
> +
> +	/*
> +	 * Unfortunately, the GPU does not always serialise its write
> +	 * of the CSB entries before its write of the CSB pointer, at least
> +	 * from the perspective of the CPU, using what is known as a Global
> +	 * Observation Point. We may read a new CSB tail pointer, but then
> +	 * read the stale CSB entries, causing us to misinterpret the
> +	 * context-switch events, and eventually declare the GPU hung.
> +	 *
> +	 * icl:HSDES#1806554093
> +	 * tgl:HSDES#22011248461
> +	 */
> +	if (unlikely(entry == -1))
> +		entry = wa_csb_read(csb);
> +
> +	/* Consume this entry so that we can spot its future reuse. */
> +	WRITE_ONCE(*csb, -1);
> +
> +	/* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
> +	return entry;
>  }
>  
>  static void process_csb(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	const u64 * const buf = execlists->csb_status;
> +	u64 * const buf = execlists->csb_status;
>  	const u8 num_entries = execlists->csb_size;
>  	u8 head, tail;
>  
> @@ -2603,6 +2626,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  	rmb();
>  	do {
>  		bool promote;
> +		u64 csb;
>  
>  		if (++head == num_entries)
>  			head = 0;
> @@ -2625,15 +2649,14 @@ static void process_csb(struct intel_engine_cs *engine)
>  		 * status notifier.
>  		 */
>  
> +		csb = csb_read(buf + head);
>  		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
> -			     head,
> -			     upper_32_bits(buf[head]),
> -			     lower_32_bits(buf[head]));
> +			     head, upper_32_bits(csb), lower_32_bits(csb));
>  
>  		if (INTEL_GEN(engine->i915) >= 12)
> -			promote = gen12_csb_parse(buf + head);
> +			promote = gen12_csb_parse(csb);
>  		else
> -			promote = gen8_csb_parse(buf + head);
> +			promote = gen8_csb_parse(csb);
>  		if (promote) {
>  			struct i915_request * const *old = execlists->active;
>  
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Intel-gfx] [PATCH 4/4] drm/i915/gt: Use a mmio read of the CSB in case of failure
  2020-09-15 12:41 ` [Intel-gfx] [PATCH 4/4] drm/i915/gt: Use a mmio read of the CSB in case of failure Chris Wilson
@ 2020-09-15 13:39   ` Mika Kuoppala
  0 siblings, 0 replies; 18+ messages in thread
From: Mika Kuoppala @ 2020-09-15 13:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> If we find the GPU didn't update the CSB within 50us, we currently fail
> and eventually reset the GPU. Lets report the value from the mmio space
> as a last resort, it may just stave off an unnecessary GPU reset.
>
> Suggested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

I am more of a messenger. This can be replaced by
References: HSDES#22011327657

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c     | 26 +++++++++++++++++++------
>  drivers/gpu/drm/i915/gt/intel_lrc_reg.h |  3 +++
>  2 files changed, 23 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index fcb6ec3d55f4..7cf208311539 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2528,19 +2528,33 @@ static inline bool gen8_csb_parse(const u64 csb)
>  	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
>  }
>  
> -static noinline u64 wa_csb_read(u64 * const csb)
> +static noinline u64
> +wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
>  {
>  	u64 entry;
>  
>  	preempt_disable();
> -	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> -		GEM_WARN_ON("50us CSB timeout");
> +	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50)) {

The hsdes says 30us delay between head and csb fetch. But well we
want to sort it out as quickly as possible.

hsdes also states that status buf read delay OR mmio read.

I think that our AND variation surpasses the recommendations.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +		int idx = csb - engine->execlists.csb_status;
> +		int status;
> +
> +		status = GEN8_EXECLISTS_STATUS_BUF;
> +		if (idx >= 6) {
> +			status = GEN11_EXECLISTS_STATUS_BUF2;
> +			idx -= 6;
> +		}
> +		status += sizeof(u64) * idx;
> +
> +		entry = intel_uncore_read64(engine->uncore,
> +					    _MMIO(engine->mmio_base + status));
> +	}
>  	preempt_enable();
>  
>  	return entry;
>  }
>  
> -static inline u64 csb_read(u64 * const csb)
> +static inline u64
> +csb_read(const struct intel_engine_cs *engine, u64 * const csb)
>  {
>  	u64 entry = READ_ONCE(*csb);
>  
> @@ -2556,7 +2570,7 @@ static inline u64 csb_read(u64 * const csb)
>  	 * tgl:HSDES#22011248461
>  	 */
>  	if (unlikely(entry == -1))
> -		entry = wa_csb_read(csb);
> +		entry = wa_csb_read(engine, csb);
>  
>  	/* Consume this entry so that we can spot its future reuse. */
>  	WRITE_ONCE(*csb, -1);
> @@ -2649,7 +2663,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  		 * status notifier.
>  		 */
>  
> -		csb = csb_read(buf + head);
> +		csb = csb_read(engine, buf + head);
>  		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
>  			     head, upper_32_bits(csb), lower_32_bits(csb));
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index 93cb6c460508..1b51f7b9a5c3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -49,4 +49,7 @@
>  #define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x1A
>  #define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0xD
>  
> +#define GEN8_EXECLISTS_STATUS_BUF 0x370
> +#define GEN11_EXECLISTS_STATUS_BUF2 0x3c0
> +
>  #endif /* _INTEL_LRC_REG_H_ */
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-09-15 12:41 [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
                   ` (2 preceding siblings ...)
  2020-09-15 12:41 ` [Intel-gfx] [PATCH 4/4] drm/i915/gt: Use a mmio read of the CSB in case of failure Chris Wilson
@ 2020-09-15 13:56 ` Patchwork
  2020-09-15 13:57 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2020-09-15 13:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
URL   : https://patchwork.freedesktop.org/series/81687/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
4ad31c2c30ca drm/i915/gt: Widen CSB pointer to u64 for the parsers
b75249b85d56 drm/i915/gt: Wait for CSB entries on Tigerlake
-:24: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")'
#24: 
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")

-:51: ERROR:ASSIGN_IN_IF: do not use assignment in if condition
#51: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2509:
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))

total: 2 errors, 0 warnings, 0 checks, 33 lines checked
aa0bee81790f drm/i915/gt: Apply the CSB w/a for all
-:12: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")'
#12: 
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")

-:82: ERROR:ASSIGN_IN_IF: do not use assignment in if condition
#82: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2536:
+	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))

total: 2 errors, 0 warnings, 0 checks, 123 lines checked
d7c623ca7daa drm/i915/gt: Use a mmio read of the CSB in case of failure
-:32: ERROR:ASSIGN_IN_IF: do not use assignment in if condition
#32: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2537:
+	if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50)) {

total: 1 errors, 0 warnings, 0 checks, 60 lines checked


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-09-15 12:41 [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
                   ` (3 preceding siblings ...)
  2020-09-15 13:56 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Patchwork
@ 2020-09-15 13:57 ` Patchwork
  2020-09-15 14:21 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2020-09-15 17:26 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  6 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2020-09-15 13:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
URL   : https://patchwork.freedesktop.org/series/81687/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.
+drivers/gpu/drm/i915/gt/intel_reset.c:1311:5: warning: context imbalance in 'intel_gt_reset_trylock' - different lock contexts for basic block
+drivers/gpu/drm/i915/gvt/mmio.c:290:23: warning: memcpy with byte count of 279040
+./include/linux/seqlock.h:752:24: warning: trying to copy expression type 31
+./include/linux/seqlock.h:778:16: warning: trying to copy expression type 31
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen11_fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen11_fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen11_fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen11_fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen11_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen11_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen11_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen12_fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen12_fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen12_fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen12_fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen12_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen12_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen12_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen6_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen6_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen6_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen6_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen6_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen6_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen6_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen8_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen8_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:409:9: warning: context imbalance in 'gen8_write8' - different lock contexts for basic block


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-09-15 12:41 [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
                   ` (4 preceding siblings ...)
  2020-09-15 13:57 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
@ 2020-09-15 14:21 ` Patchwork
  2020-09-15 17:26 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  6 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2020-09-15 14:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 8024 bytes --]

== Series Details ==

Series: series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
URL   : https://patchwork.freedesktop.org/series/81687/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_9011 -> Patchwork_18501
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/index.html

Known issues
------------

  Here are the changes found in Patchwork_18501 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_module_load@reload:
    - fi-byt-j1900:       [PASS][1] -> [DMESG-WARN][2] ([i915#1982]) +2 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-byt-j1900/igt@i915_module_load@reload.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-byt-j1900/igt@i915_module_load@reload.html
    - fi-tgl-y:           [PASS][3] -> [DMESG-WARN][4] ([i915#1982] / [k.org#205379])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-tgl-y/igt@i915_module_load@reload.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-tgl-y/igt@i915_module_load@reload.html

  * igt@i915_pm_rpm@module-reload:
    - fi-bsw-n3050:       [PASS][5] -> [DMESG-WARN][6] ([i915#1982]) +1 similar issue
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-bsw-n3050/igt@i915_pm_rpm@module-reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-bsw-n3050/igt@i915_pm_rpm@module-reload.html

  * igt@kms_busy@basic@flip:
    - fi-tgl-y:           [PASS][7] -> [DMESG-WARN][8] ([i915#1982]) +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-tgl-y/igt@kms_busy@basic@flip.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-tgl-y/igt@kms_busy@basic@flip.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-bsw-kefka:       [PASS][9] -> [DMESG-WARN][10] ([i915#1982]) +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-bsw-kefka/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-bsw-kefka/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - fi-icl-u2:          [PASS][11] -> [DMESG-WARN][12] ([i915#1982])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-icl-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-icl-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  * igt@prime_vgem@basic-fence-flip:
    - fi-tgl-y:           [PASS][13] -> [DMESG-WARN][14] ([i915#402]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-tgl-y/igt@prime_vgem@basic-fence-flip.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-tgl-y/igt@prime_vgem@basic-fence-flip.html

  * igt@vgem_basic@unload:
    - fi-kbl-x1275:       [PASS][15] -> [DMESG-WARN][16] ([i915#62] / [i915#92] / [i915#95])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-kbl-x1275/igt@vgem_basic@unload.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-kbl-x1275/igt@vgem_basic@unload.html

  
#### Possible fixes ####

  * igt@gem_exec_store@basic:
    - fi-bsw-nick:        [INCOMPLETE][17] -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-bsw-nick/igt@gem_exec_store@basic.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-bsw-nick/igt@gem_exec_store@basic.html

  * igt@gem_mmap_gtt@basic:
    - fi-tgl-y:           [DMESG-WARN][19] ([i915#402]) -> [PASS][20] +1 similar issue
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-tgl-y/igt@gem_mmap_gtt@basic.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-tgl-y/igt@gem_mmap_gtt@basic.html

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-byt-j1900:       [DMESG-WARN][21] ([i915#1982]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-byt-j1900/igt@i915_pm_rpm@basic-pci-d3-state.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-byt-j1900/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_pm_rpm@module-reload:
    - fi-bxt-dsi:         [DMESG-WARN][23] ([i915#1635] / [i915#1982]) -> [PASS][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-bxt-dsi/igt@i915_pm_rpm@module-reload.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-bxt-dsi/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live@gem_contexts:
    - fi-tgl-u2:          [INCOMPLETE][25] ([i915#2045]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-tgl-u2/igt@i915_selftest@live@gem_contexts.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-tgl-u2/igt@i915_selftest@live@gem_contexts.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-tgl-y:           [DMESG-WARN][27] ([i915#1982]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-tgl-y/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-tgl-y/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
    - fi-bsw-n3050:       [DMESG-WARN][29] ([i915#1982]) -> [PASS][30]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-bsw-n3050/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-bsw-n3050/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  
#### Warnings ####

  * igt@kms_flip@basic-flip-vs-wf_vblank@a-dp1:
    - fi-kbl-x1275:       [DMESG-WARN][31] ([i915#62] / [i915#92]) -> [DMESG-WARN][32] ([i915#62] / [i915#92] / [i915#95]) +3 similar issues
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/fi-kbl-x1275/igt@kms_flip@basic-flip-vs-wf_vblank@a-dp1.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/fi-kbl-x1275/igt@kms_flip@basic-flip-vs-wf_vblank@a-dp1.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2045]: https://gitlab.freedesktop.org/drm/intel/issues/2045
  [i915#289]: https://gitlab.freedesktop.org/drm/intel/issues/289
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95
  [k.org#205379]: https://bugzilla.kernel.org/show_bug.cgi?id=205379


Participating hosts (46 -> 40)
------------------------------

  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper 


Build changes
-------------

  * Linux: CI_DRM_9011 -> Patchwork_18501

  CI-20190529: 20190529
  CI_DRM_9011: 468ac55d488f3394ad6897883e409b287619a335 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5784: 265f1d4a5a14998b98f963ee04d695cf3f3c2d3a @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18501: d7c623ca7daaf5909ac54da3444b18d6f50be473 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

d7c623ca7daa drm/i915/gt: Use a mmio read of the CSB in case of failure
aa0bee81790f drm/i915/gt: Apply the CSB w/a for all
b75249b85d56 drm/i915/gt: Wait for CSB entries on Tigerlake
4ad31c2c30ca drm/i915/gt: Widen CSB pointer to u64 for the parsers

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/index.html

[-- Attachment #1.2: Type: text/html, Size: 9959 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-09-15 12:41 [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
                   ` (5 preceding siblings ...)
  2020-09-15 14:21 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-09-15 17:26 ` Patchwork
  6 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2020-09-15 17:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 15278 bytes --]

== Series Details ==

Series: series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers
URL   : https://patchwork.freedesktop.org/series/81687/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_9011_full -> Patchwork_18501_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_18501_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_18501_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_18501_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ctx_persistence@processes:
    - shard-iclb:         [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-iclb7/igt@gem_ctx_persistence@processes.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-iclb3/igt@gem_ctx_persistence@processes.html

  
Known issues
------------

  Here are the changes found in Patchwork_18501_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_eio@kms:
    - shard-snb:          [PASS][3] -> [DMESG-WARN][4] ([i915#1982])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-snb7/igt@gem_eio@kms.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-snb4/igt@gem_eio@kms.html

  * igt@gem_exec_whisper@basic-queues-priority-all:
    - shard-glk:          [PASS][5] -> [DMESG-WARN][6] ([i915#118] / [i915#95])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-glk7/igt@gem_exec_whisper@basic-queues-priority-all.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-glk4/igt@gem_exec_whisper@basic-queues-priority-all.html

  * igt@gem_userptr_blits@sync-unmap-cycles:
    - shard-skl:          [PASS][7] -> [TIMEOUT][8] ([i915#1958] / [i915#2424])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl3/igt@gem_userptr_blits@sync-unmap-cycles.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl4/igt@gem_userptr_blits@sync-unmap-cycles.html

  * igt@kms_big_fb@y-tiled-64bpp-rotate-180:
    - shard-kbl:          [PASS][9] -> [DMESG-WARN][10] ([i915#1982])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-kbl1/igt@kms_big_fb@y-tiled-64bpp-rotate-180.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-kbl6/igt@kms_big_fb@y-tiled-64bpp-rotate-180.html

  * igt@kms_cursor_legacy@cursor-vs-flip-varying-size:
    - shard-hsw:          [PASS][11] -> [FAIL][12] ([i915#2370])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-hsw8/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-hsw6/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html

  * igt@kms_draw_crc@draw-method-xrgb2101010-mmap-cpu-ytiled:
    - shard-tglb:         [PASS][13] -> [DMESG-WARN][14] ([i915#1982]) +2 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-tglb7/igt@kms_draw_crc@draw-method-xrgb2101010-mmap-cpu-ytiled.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-tglb8/igt@kms_draw_crc@draw-method-xrgb2101010-mmap-cpu-ytiled.html

  * igt@kms_draw_crc@draw-method-xrgb8888-blt-untiled:
    - shard-glk:          [PASS][15] -> [DMESG-WARN][16] ([i915#1982])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-glk8/igt@kms_draw_crc@draw-method-xrgb8888-blt-untiled.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-glk4/igt@kms_draw_crc@draw-method-xrgb8888-blt-untiled.html

  * igt@kms_draw_crc@draw-method-xrgb8888-blt-xtiled:
    - shard-skl:          [PASS][17] -> [FAIL][18] ([i915#52] / [i915#54])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl10/igt@kms_draw_crc@draw-method-xrgb8888-blt-xtiled.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl5/igt@kms_draw_crc@draw-method-xrgb8888-blt-xtiled.html

  * igt@kms_flip@flip-vs-blocking-wf-vblank@a-edp1:
    - shard-skl:          [PASS][19] -> [DMESG-WARN][20] ([i915#1982]) +5 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl8/igt@kms_flip@flip-vs-blocking-wf-vblank@a-edp1.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl2/igt@kms_flip@flip-vs-blocking-wf-vblank@a-edp1.html

  * igt@kms_flip@flip-vs-suspend@c-dp1:
    - shard-kbl:          [PASS][21] -> [DMESG-WARN][22] ([i915#180]) +1 similar issue
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-kbl4/igt@kms_flip@flip-vs-suspend@c-dp1.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-kbl3/igt@kms_flip@flip-vs-suspend@c-dp1.html

  * igt@kms_flip@flip-vs-suspend@c-edp1:
    - shard-skl:          [PASS][23] -> [INCOMPLETE][24] ([i915#198])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl3/igt@kms_flip@flip-vs-suspend@c-edp1.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl7/igt@kms_flip@flip-vs-suspend@c-edp1.html

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          [PASS][25] -> [FAIL][26] ([fdo#108145] / [i915#265])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl10/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html

  * igt@kms_psr2_su@page_flip:
    - shard-iclb:         [PASS][27] -> [SKIP][28] ([fdo#109642] / [fdo#111068])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-iclb2/igt@kms_psr2_su@page_flip.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-iclb7/igt@kms_psr2_su@page_flip.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [PASS][29] -> [SKIP][30] ([fdo#109441]) +3 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-iclb2/igt@kms_psr@psr2_cursor_render.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-iclb7/igt@kms_psr@psr2_cursor_render.html

  * igt@kms_setmode@basic:
    - shard-skl:          [PASS][31] -> [FAIL][32] ([i915#31])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl5/igt@kms_setmode@basic.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl7/igt@kms_setmode@basic.html

  
#### Possible fixes ####

  * igt@gem_exec_reloc@basic-many-active@vcs0:
    - shard-glk:          [FAIL][33] ([i915#2389]) -> [PASS][34] +1 similar issue
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-glk5/igt@gem_exec_reloc@basic-many-active@vcs0.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-glk1/igt@gem_exec_reloc@basic-many-active@vcs0.html

  * igt@gem_exec_whisper@basic-contexts-priority:
    - shard-glk:          [DMESG-WARN][35] ([i915#118] / [i915#95]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-glk1/igt@gem_exec_whisper@basic-contexts-priority.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-glk7/igt@gem_exec_whisper@basic-contexts-priority.html

  * igt@gem_huc_copy@huc-copy:
    - shard-tglb:         [SKIP][37] ([i915#2190]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-tglb6/igt@gem_huc_copy@huc-copy.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-tglb7/igt@gem_huc_copy@huc-copy.html

  * igt@gem_workarounds@suspend-resume-context:
    - shard-skl:          [INCOMPLETE][39] ([i915#198]) -> [PASS][40] +1 similar issue
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl3/igt@gem_workarounds@suspend-resume-context.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl2/igt@gem_workarounds@suspend-resume-context.html

  * igt@i915_pm_sseu@full-enable:
    - shard-glk:          [FAIL][41] -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-glk5/igt@i915_pm_sseu@full-enable.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-glk2/igt@i915_pm_sseu@full-enable.html

  * igt@i915_selftest@mock@contexts:
    - shard-skl:          [INCOMPLETE][43] ([i915#198] / [i915#2278]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl4/igt@i915_selftest@mock@contexts.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl8/igt@i915_selftest@mock@contexts.html
    - shard-apl:          [INCOMPLETE][45] ([i915#1635] / [i915#2278]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-apl4/igt@i915_selftest@mock@contexts.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-apl4/igt@i915_selftest@mock@contexts.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-kbl:          [DMESG-WARN][47] ([i915#180]) -> [PASS][48] +3 similar issues
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-kbl3/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-kbl2/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_cursor_edge_walk@pipe-c-256x256-top-edge:
    - shard-glk:          [DMESG-WARN][49] ([i915#1982]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-glk5/igt@kms_cursor_edge_walk@pipe-c-256x256-top-edge.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-glk2/igt@kms_cursor_edge_walk@pipe-c-256x256-top-edge.html

  * igt@kms_flip@2x-wf_vblank-ts-check-interruptible@ab-hdmi-a1-hdmi-a2:
    - shard-glk:          [FAIL][51] ([i915#2122]) -> [PASS][52]
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-glk1/igt@kms_flip@2x-wf_vblank-ts-check-interruptible@ab-hdmi-a1-hdmi-a2.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-glk7/igt@kms_flip@2x-wf_vblank-ts-check-interruptible@ab-hdmi-a1-hdmi-a2.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1:
    - shard-skl:          [FAIL][53] ([i915#2122]) -> [PASS][54]
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl5/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl9/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html

  * igt@kms_flip@plain-flip-ts-check-interruptible@a-dp1:
    - shard-kbl:          [DMESG-WARN][55] ([i915#1982]) -> [PASS][56]
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-kbl3/igt@kms_flip@plain-flip-ts-check-interruptible@a-dp1.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-kbl4/igt@kms_flip@plain-flip-ts-check-interruptible@a-dp1.html

  * igt@kms_hdr@bpc-switch-suspend:
    - shard-skl:          [FAIL][57] ([i915#1188]) -> [PASS][58] +1 similar issue
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl9/igt@kms_hdr@bpc-switch-suspend.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl1/igt@kms_hdr@bpc-switch-suspend.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [SKIP][59] ([fdo#109441]) -> [PASS][60] +2 similar issues
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-iclb4/igt@kms_psr@psr2_sprite_plane_move.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@perf@oa-exponents:
    - shard-skl:          [DMESG-WARN][61] ([i915#1982]) -> [PASS][62] +5 similar issues
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl7/igt@perf@oa-exponents.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl3/igt@perf@oa-exponents.html

  * igt@perf@polling:
    - shard-skl:          [FAIL][63] ([i915#1542]) -> [PASS][64]
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-skl4/igt@perf@polling.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-skl8/igt@perf@polling.html

  
#### Warnings ####

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [DMESG-WARN][65] ([i915#1226]) -> [SKIP][66] ([fdo#109349])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9011/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/shard-iclb7/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1226]: https://gitlab.freedesktop.org/drm/intel/issues/1226
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1958]: https://gitlab.freedesktop.org/drm/intel/issues/1958
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2278]: https://gitlab.freedesktop.org/drm/intel/issues/2278
  [i915#2370]: https://gitlab.freedesktop.org/drm/intel/issues/2370
  [i915#2389]: https://gitlab.freedesktop.org/drm/intel/issues/2389
  [i915#2424]: https://gitlab.freedesktop.org/drm/intel/issues/2424
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#52]: https://gitlab.freedesktop.org/drm/intel/issues/52
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (11 -> 11)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_9011 -> Patchwork_18501

  CI-20190529: 20190529
  CI_DRM_9011: 468ac55d488f3394ad6897883e409b287619a335 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5784: 265f1d4a5a14998b98f963ee04d695cf3f3c2d3a @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18501: d7c623ca7daaf5909ac54da3444b18d6f50be473 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18501/index.html

[-- Attachment #1.2: Type: text/html, Size: 17685 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-09-15 12:41   ` [Intel-gfx] " Chris Wilson
@ 2020-09-16  6:33     ` Greg KH
  -1 siblings, 0 replies; 18+ messages in thread
From: Greg KH @ 2020-09-16  6:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Bruce Chang, Mika Kuoppala, stable

On Tue, Sep 15, 2020 at 01:41:48PM +0100, Chris Wilson wrote:
> On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> Forcibly evict stale csb entries") where, presumably, due to a missing
> Global Observation Point synchronisation, the write pointer of the CSB
> ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> we see the GPU report more context-switch entries for us to parse, but
> those entries have not been written, leading us to process stale events,
> and eventually report a hung GPU.
> 
> However, this effect appears to be much more severe than we previously
> saw on Icelake (though it might be best if we try the same approach
> there as well and measure), and Bruce suggested the good idea of resetting
> the CSB entry after use so that we can detect when it has been updated by
> the GPU. By instrumenting how long that may be, we can set a reliable
> upper bound for how long we should wait for:
> 
>     513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")

What does "References:" mean?  Should that be "Fixes:"?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-09-16  6:33     ` Greg KH
  0 siblings, 0 replies; 18+ messages in thread
From: Greg KH @ 2020-09-16  6:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, stable

On Tue, Sep 15, 2020 at 01:41:48PM +0100, Chris Wilson wrote:
> On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> Forcibly evict stale csb entries") where, presumably, due to a missing
> Global Observation Point synchronisation, the write pointer of the CSB
> ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> we see the GPU report more context-switch entries for us to parse, but
> those entries have not been written, leading us to process stale events,
> and eventually report a hung GPU.
> 
> However, this effect appears to be much more severe than we previously
> saw on Icelake (though it might be best if we try the same approach
> there as well and measure), and Bruce suggested the good idea of resetting
> the CSB entry after use so that we can detect when it has been updated by
> the GPU. By instrumenting how long that may be, we can set a reliable
> upper bound for how long we should wait for:
> 
>     513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")

What does "References:" mean?  Should that be "Fixes:"?

thanks,

greg k-h
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-09-16  6:33     ` [Intel-gfx] " Greg KH
@ 2020-09-16  8:26       ` Chris Wilson
  -1 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2020-09-16  8:26 UTC (permalink / raw)
  To: Greg KH; +Cc: intel-gfx, stable

Quoting Greg KH (2020-09-16 07:33:58)
> On Tue, Sep 15, 2020 at 01:41:48PM +0100, Chris Wilson wrote:
> > On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> > Forcibly evict stale csb entries") where, presumably, due to a missing
> > Global Observation Point synchronisation, the write pointer of the CSB
> > ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> > we see the GPU report more context-switch entries for us to parse, but
> > those entries have not been written, leading us to process stale events,
> > and eventually report a hung GPU.
> > 
> > However, this effect appears to be much more severe than we previously
> > saw on Icelake (though it might be best if we try the same approach
> > there as well and measure), and Bruce suggested the good idea of resetting
> > the CSB entry after use so that we can detect when it has been updated by
> > the GPU. By instrumenting how long that may be, we can set a reliable
> > upper bound for how long we should wait for:
> > 
> >     513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
> > 
> > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> > References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> 
> What does "References:" mean?  Should that be "Fixes:"?

It's a reference to an earlier w/a for a previous generation for the
same symptoms. This patch should supplement that w/a.
-Chris

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-09-16  8:26       ` Chris Wilson
  0 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2020-09-16  8:26 UTC (permalink / raw)
  To: Greg KH; +Cc: intel-gfx, stable

Quoting Greg KH (2020-09-16 07:33:58)
> On Tue, Sep 15, 2020 at 01:41:48PM +0100, Chris Wilson wrote:
> > On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> > Forcibly evict stale csb entries") where, presumably, due to a missing
> > Global Observation Point synchronisation, the write pointer of the CSB
> > ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> > we see the GPU report more context-switch entries for us to parse, but
> > those entries have not been written, leading us to process stale events,
> > and eventually report a hung GPU.
> > 
> > However, this effect appears to be much more severe than we previously
> > saw on Icelake (though it might be best if we try the same approach
> > there as well and measure), and Bruce suggested the good idea of resetting
> > the CSB entry after use so that we can detect when it has been updated by
> > the GPU. By instrumenting how long that may be, we can set a reliable
> > upper bound for how long we should wait for:
> > 
> >     513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
> > 
> > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> > References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> 
> What does "References:" mean?  Should that be "Fixes:"?

It's a reference to an earlier w/a for a previous generation for the
same symptoms. This patch should supplement that w/a.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-09-16  8:26       ` Chris Wilson
@ 2020-09-16  8:35         ` Greg KH
  -1 siblings, 0 replies; 18+ messages in thread
From: Greg KH @ 2020-09-16  8:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, stable

On Wed, Sep 16, 2020 at 09:26:58AM +0100, Chris Wilson wrote:
> Quoting Greg KH (2020-09-16 07:33:58)
> > On Tue, Sep 15, 2020 at 01:41:48PM +0100, Chris Wilson wrote:
> > > On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> > > Forcibly evict stale csb entries") where, presumably, due to a missing
> > > Global Observation Point synchronisation, the write pointer of the CSB
> > > ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> > > we see the GPU report more context-switch entries for us to parse, but
> > > those entries have not been written, leading us to process stale events,
> > > and eventually report a hung GPU.
> > > 
> > > However, this effect appears to be much more severe than we previously
> > > saw on Icelake (though it might be best if we try the same approach
> > > there as well and measure), and Bruce suggested the good idea of resetting
> > > the CSB entry after use so that we can detect when it has been updated by
> > > the GPU. By instrumenting how long that may be, we can set a reliable
> > > upper bound for how long we should wait for:
> > > 
> > >     513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
> > > 
> > > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> > > References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> > 
> > What does "References:" mean?  Should that be "Fixes:"?
> 
> It's a reference to an earlier w/a for a previous generation for the
> same symptoms. This patch should supplement that w/a.

I see no such "reference" to that tag in
Documentation/process/submitting-patches.rst, so how were we supposed to
know this?  :)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-09-16  8:35         ` Greg KH
  0 siblings, 0 replies; 18+ messages in thread
From: Greg KH @ 2020-09-16  8:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, stable

On Wed, Sep 16, 2020 at 09:26:58AM +0100, Chris Wilson wrote:
> Quoting Greg KH (2020-09-16 07:33:58)
> > On Tue, Sep 15, 2020 at 01:41:48PM +0100, Chris Wilson wrote:
> > > On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> > > Forcibly evict stale csb entries") where, presumably, due to a missing
> > > Global Observation Point synchronisation, the write pointer of the CSB
> > > ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> > > we see the GPU report more context-switch entries for us to parse, but
> > > those entries have not been written, leading us to process stale events,
> > > and eventually report a hung GPU.
> > > 
> > > However, this effect appears to be much more severe than we previously
> > > saw on Icelake (though it might be best if we try the same approach
> > > there as well and measure), and Bruce suggested the good idea of resetting
> > > the CSB entry after use so that we can detect when it has been updated by
> > > the GPU. By instrumenting how long that may be, we can set a reliable
> > > upper bound for how long we should wait for:
> > > 
> > >     513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
> > > 
> > > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> > > References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> > 
> > What does "References:" mean?  Should that be "Fixes:"?
> 
> It's a reference to an earlier w/a for a previous generation for the
> same symptoms. This patch should supplement that w/a.

I see no such "reference" to that tag in
Documentation/process/submitting-patches.rst, so how were we supposed to
know this?  :)

thanks,

greg k-h
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2020-09-16  8:34 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-15 12:41 [Intel-gfx] [PATCH 1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
2020-09-15 12:41 ` [PATCH 2/4] drm/i915/gt: Wait for CSB entries on Tigerlake Chris Wilson
2020-09-15 12:41   ` [Intel-gfx] " Chris Wilson
2020-09-15 13:19   ` Mika Kuoppala
2020-09-16  6:33   ` Greg KH
2020-09-16  6:33     ` [Intel-gfx] " Greg KH
2020-09-16  8:26     ` Chris Wilson
2020-09-16  8:26       ` Chris Wilson
2020-09-16  8:35       ` Greg KH
2020-09-16  8:35         ` Greg KH
2020-09-15 12:41 ` [Intel-gfx] [PATCH 3/4] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
2020-09-15 13:29   ` Mika Kuoppala
2020-09-15 12:41 ` [Intel-gfx] [PATCH 4/4] drm/i915/gt: Use a mmio read of the CSB in case of failure Chris Wilson
2020-09-15 13:39   ` Mika Kuoppala
2020-09-15 13:56 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/4] drm/i915/gt: Widen CSB pointer to u64 for the parsers Patchwork
2020-09-15 13:57 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-09-15 14:21 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-09-15 17:26 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.