All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
@ 2020-08-14 15:57 Chris Wilson
  2020-08-14 15:57   ` [Intel-gfx] " Chris Wilson
                   ` (6 more replies)
  0 siblings, 7 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 15:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

A CSB entry is 64b, and it is simpler for us to treat it as an array of
64b entries than as an array of pairs of 32b entries.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 33 ++++++++++----------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c400aaa2287b..ee6312601c56 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -278,7 +278,7 @@ struct intel_engine_execlists {
 	 *
 	 * Note these register may be either mmio or HWSP shadow.
 	 */
-	u32 *csb_status;
+	u64 *csb_status;
 
 	/**
 	 * @csb_size: context status buffer FIFO size
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 82742c6f423c..db982fc0f0bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2464,7 +2464,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
 }
 
 static inline void
-invalidate_csb_entries(const u32 *first, const u32 *last)
+invalidate_csb_entries(const u64 *first, const u64 *last)
 {
 	clflush((void *)first);
 	clflush((void *)last);
@@ -2496,14 +2496,12 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
  *     bits 47-57: sw context id of the lrc the GT switched away from
  *     bits 58-63: sw counter of the lrc the GT switched away from
  */
-static inline bool
-gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u32 lower_dw = csb[0];
-	u32 upper_dw = csb[1];
-	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
-	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+	u64 entry = READ_ONCE(*csb);
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	bool new_queue =
+		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
 	 * The context switch detail is not guaranteed to be 5 when a preemption
@@ -2513,7 +2511,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * would require some extra handling, but we don't support that.
 	 */
 	if (!ctx_away_valid || new_queue) {
-		GEM_BUG_ON(!ctx_to_valid);
+		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
 		return true;
 	}
 
@@ -2522,12 +2520,11 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 	 * context switch on an unsuccessful wait instruction since we always
 	 * use polling mode.
 	 */
-	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
 	return false;
 }
 
-static inline bool
-gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+static inline bool gen8_csb_parse(const u64 *csb)
 {
 	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
 }
@@ -2535,7 +2532,7 @@ gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	const u32 * const buf = execlists->csb_status;
+	const u64 * const buf = execlists->csb_status;
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
@@ -2616,12 +2613,14 @@ static void process_csb(struct intel_engine_cs *engine)
 		 */
 
 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
-			     head, buf[2 * head + 0], buf[2 * head + 1]);
+			     head,
+			     upper_32_bits(buf[head]),
+			     lower_32_bits(buf[head]));
 
 		if (INTEL_GEN(engine->i915) >= 12)
-			promote = gen12_csb_parse(execlists, buf + 2 * head);
+			promote = gen12_csb_parse(buf + head);
 		else
-			promote = gen8_csb_parse(execlists, buf + 2 * head);
+			promote = gen8_csb_parse(buf + head);
 		if (promote) {
 			struct i915_request * const *old = execlists->active;
 
@@ -5148,7 +5147,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 	}
 
 	execlists->csb_status =
-		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
+		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-08-14 15:57 [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
@ 2020-08-14 15:57   ` Chris Wilson
  2020-08-14 15:57 ` [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 15:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Bruce Chang, Mika Kuoppala, stable

On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
Forcibly evict stale csb entries") where, presumably, due to a missing
Global Observation Point synchronisation, the write pointer of the CSB
ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
we see the GPU report more context-switch entries for us to parse, but
those entries have not been written, leading us to process stale events,
and eventually report a hung GPU.

However, this effect appears to be much more severe than we previously
saw on Icelake (though it might be best if we try the same approach
there as well and measure), and Bruce suggested the good idea of resetting
the CSB entry after use so that we can detect when it has been updated by
the GPU. By instrumenting how long that may be, we can set a reliable
upper bound for how long we should wait for:

    513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: stable@vger.kernel.org # v5.4
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index db982fc0f0bc..3b8161c6b601 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
  */
 static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u64 entry = READ_ONCE(*csb);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	bool new_queue =
+	bool ctx_away_valid;
+	bool new_queue;
+	u64 entry;
+
+	/* XXX HSD */
+	entry = READ_ONCE(*csb);
+	if (unlikely(entry == -1)) {
+		preempt_disable();
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+			GEM_WARN_ON("50us CSB timeout");
+		preempt_enable();
+	}
+	WRITE_ONCE(*(u64 *)csb, -1);
+
+	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	new_queue =
 		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
@@ -3995,6 +4008,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	WRITE_ONCE(*execlists->csb_write, reset_value);
 	wmb(); /* Make sure this is visible to HW (paranoia?) */
 
+	/* Check that the GPU does indeed update the CSB entries! */
+	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
 	invalidate_csb_entries(&execlists->csb_status[0],
 			       &execlists->csb_status[reset_value]);
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-08-14 15:57   ` Chris Wilson
  0 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 15:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable, Chris Wilson

On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
Forcibly evict stale csb entries") where, presumably, due to a missing
Global Observation Point synchronisation, the write pointer of the CSB
ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
we see the GPU report more context-switch entries for us to parse, but
those entries have not been written, leading us to process stale events,
and eventually report a hung GPU.

However, this effect appears to be much more severe than we previously
saw on Icelake (though it might be best if we try the same approach
there as well and measure), and Bruce suggested the good idea of resetting
the CSB entry after use so that we can detect when it has been updated by
the GPU. By instrumenting how long that may be, we can set a reliable
upper bound for how long we should wait for:

    513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: stable@vger.kernel.org # v5.4
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index db982fc0f0bc..3b8161c6b601 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
  */
 static inline bool gen12_csb_parse(const u64 *csb)
 {
-	u64 entry = READ_ONCE(*csb);
-	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	bool new_queue =
+	bool ctx_away_valid;
+	bool new_queue;
+	u64 entry;
+
+	/* XXX HSD */
+	entry = READ_ONCE(*csb);
+	if (unlikely(entry == -1)) {
+		preempt_disable();
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+			GEM_WARN_ON("50us CSB timeout");
+		preempt_enable();
+	}
+	WRITE_ONCE(*(u64 *)csb, -1);
+
+	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
+	new_queue =
 		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
@@ -3995,6 +4008,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	WRITE_ONCE(*execlists->csb_write, reset_value);
 	wmb(); /* Make sure this is visible to HW (paranoia?) */
 
+	/* Check that the GPU does indeed update the CSB entries! */
+	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
 	invalidate_csb_entries(&execlists->csb_status[0],
 			       &execlists->csb_status[reset_value]);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all
  2020-08-14 15:57 [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
  2020-08-14 15:57   ` [Intel-gfx] " Chris Wilson
@ 2020-08-14 15:57 ` Chris Wilson
  2020-08-14 18:18   ` Chang, Bruce
  2020-08-14 18:41   ` Mika Kuoppala
  2020-08-14 16:15 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Patchwork
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 15:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Since we expect to inline the csb_parse() routines, the w/a for the
stale CSB data on Tigerlake will be pulled into process_csb(), and so we
might as well simply reuse the logic for all, and so will hopefully
avoid any strange behaviour on Icelake that was not covered by our
previous w/a.

References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Bruce Chang <yu.bruce.chang@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 70 +++++++++++++++++------------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3b8161c6b601..c176a029f27b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
  *     bits 47-57: sw context id of the lrc the GT switched away from
  *     bits 58-63: sw counter of the lrc the GT switched away from
  */
-static inline bool gen12_csb_parse(const u64 *csb)
+static inline bool gen12_csb_parse(const u64 csb)
 {
-	bool ctx_away_valid;
-	bool new_queue;
-	u64 entry;
-
-	/* XXX HSD */
-	entry = READ_ONCE(*csb);
-	if (unlikely(entry == -1)) {
-		preempt_disable();
-		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
-			GEM_WARN_ON("50us CSB timeout");
-		preempt_enable();
-	}
-	WRITE_ONCE(*(u64 *)csb, -1);
-
-	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
-	new_queue =
-		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
+	bool new_queue =
+		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
 
 	/*
 	 * The context switch detail is not guaranteed to be 5 when a preemption
@@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
 	 * would require some extra handling, but we don't support that.
 	 */
 	if (!ctx_away_valid || new_queue) {
-		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
+		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
 		return true;
 	}
 
@@ -2533,19 +2519,47 @@ static inline bool gen12_csb_parse(const u64 *csb)
 	 * context switch on an unsuccessful wait instruction since we always
 	 * use polling mode.
 	 */
-	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
+	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
 	return false;
 }
 
-static inline bool gen8_csb_parse(const u64 *csb)
+static inline bool gen8_csb_parse(const u64 csb)
+{
+	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+}
+
+static inline u64 csb_read(u64 * const csb)
 {
-	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+	u64 entry = READ_ONCE(*csb);
+
+	/*
+	 * Unfortunately, the GPU does not always serialise its write
+	 * of the CSB entries before its write of the CSB pointer, at least
+	 * from the perspective of the CPU, using what is known as a Global
+	 * Observation Point. We may read a new CSB tail pointer, but then
+	 * read the stale CSB entries, causing us to misinterpret the
+	 * context-switch events, and eventually declare the GPU hung.
+	 *
+	 * icl:HSDES#:1806554093
+	 * tgl:XXX?
+	 */
+	if (unlikely(entry == -1)) {
+		preempt_disable();
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+			GEM_WARN_ON("50us CSB timeout");
+		preempt_enable();
+	}
+
+	/* Consume this entry so that we can spot its future reuse. */
+	WRITE_ONCE(*csb, -1);
+
+	return entry;
 }
 
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	const u64 * const buf = execlists->csb_status;
+	u64 * const buf = execlists->csb_status;
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
@@ -2603,6 +2617,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 	do {
 		bool promote;
+		u64 csb;
 
 		if (++head == num_entries)
 			head = 0;
@@ -2625,15 +2640,14 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
+		csb = csb_read(buf + head);
 		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
-			     head,
-			     upper_32_bits(buf[head]),
-			     lower_32_bits(buf[head]));
+			     head, upper_32_bits(csb), lower_32_bits(csb));
 
 		if (INTEL_GEN(engine->i915) >= 12)
-			promote = gen12_csb_parse(buf + head);
+			promote = gen12_csb_parse(csb);
 		else
-			promote = gen8_csb_parse(buf + head);
+			promote = gen8_csb_parse(csb);
 		if (promote) {
 			struct i915_request * const *old = execlists->active;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-08-14 15:57 [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
  2020-08-14 15:57   ` [Intel-gfx] " Chris Wilson
  2020-08-14 15:57 ` [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
@ 2020-08-14 16:15 ` Patchwork
  2020-08-14 16:16 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2020-08-14 16:15 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
URL   : https://patchwork.freedesktop.org/series/80627/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
1c6a740de44a drm/i915/gt: Widen CSB pointer to u64 for the parsers
f1c36ee09bda drm/i915/gt: Wait for CSB entries on Tigerlake
-:24: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")'
#24: 
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")

-:50: ERROR:ASSIGN_IN_IF: do not use assignment in if condition
#50: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2509:
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))

total: 2 errors, 0 warnings, 0 checks, 33 lines checked
0e3d8f6ca520 drm/i915/gt: Apply the CSB w/a for all
-:12: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")'
#12: 
References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")

-:93: ERROR:ASSIGN_IN_IF: do not use assignment in if condition
#93: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:2548:
+		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))

total: 2 errors, 0 warnings, 0 checks, 114 lines checked


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-08-14 15:57 [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
                   ` (2 preceding siblings ...)
  2020-08-14 16:15 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Patchwork
@ 2020-08-14 16:16 ` Patchwork
  2020-08-14 16:34 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2020-08-14 16:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
URL   : https://patchwork.freedesktop.org/series/80627/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.
-
+drivers/gpu/drm/i915/gt/intel_reset.c:1311:5: warning: context imbalance in 'intel_gt_reset_trylock' - different lock contexts for basic block
+drivers/gpu/drm/i915/gvt/mmio.c:287:23: warning: memcpy with byte count of 279040
+drivers/gpu/drm/i915/i915_perf.c:1425:15: warning: memset with byte count of 16777216
+drivers/gpu/drm/i915/i915_perf.c:1479:15: warning: memset with byte count of 16777216
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen11_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen12_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen6_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen8_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen8_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:408:9: warning: context imbalance in 'gen8_write8' - different lock contexts for basic block


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-08-14 15:57 [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
                   ` (3 preceding siblings ...)
  2020-08-14 16:16 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
@ 2020-08-14 16:34 ` Patchwork
  2020-08-14 18:07 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  2020-08-14 18:29 ` [Intel-gfx] [PATCH 1/3] " Mika Kuoppala
  6 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2020-08-14 16:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 5880 bytes --]

== Series Details ==

Series: series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
URL   : https://patchwork.freedesktop.org/series/80627/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8882 -> Patchwork_18358
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/index.html

Known issues
------------

  Here are the changes found in Patchwork_18358 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_module_load@reload:
    - fi-bxt-dsi:         [PASS][1] -> [DMESG-WARN][2] ([i915#1635] / [i915#1982])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-bxt-dsi/igt@i915_module_load@reload.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-bxt-dsi/igt@i915_module_load@reload.html

  * igt@kms_busy@basic@flip:
    - fi-kbl-x1275:       [PASS][3] -> [DMESG-WARN][4] ([i915#62] / [i915#92] / [i915#95])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-kbl-x1275/igt@kms_busy@basic@flip.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-kbl-x1275/igt@kms_busy@basic@flip.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-cml-u2:          [PASS][5] -> [DMESG-WARN][6] ([i915#1982])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-cml-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-cml-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
    - fi-bsw-kefka:       [PASS][7] -> [DMESG-WARN][8] ([i915#1982])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-bsw-kefka/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-bsw-kefka/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s0:
    - fi-tgl-u2:          [FAIL][9] ([i915#1888]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-tgl-u2/igt@gem_exec_suspend@basic-s0.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-tgl-u2/igt@gem_exec_suspend@basic-s0.html

  * igt@i915_selftest@live@gem_contexts:
    - fi-tgl-u2:          [INCOMPLETE][11] ([i915#2045]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-tgl-u2/igt@i915_selftest@live@gem_contexts.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-tgl-u2/igt@i915_selftest@live@gem_contexts.html

  * igt@kms_busy@basic@flip:
    - {fi-tgl-dsi}:       [DMESG-WARN][13] ([i915#1982]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-tgl-dsi/igt@kms_busy@basic@flip.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-tgl-dsi/igt@kms_busy@basic@flip.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-kbl-r:           [DMESG-WARN][15] ([i915#1982]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-kbl-r/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-kbl-r/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  
#### Warnings ####

  * igt@gem_exec_suspend@basic-s0:
    - fi-kbl-x1275:       [DMESG-WARN][17] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][18] ([i915#62] / [i915#92]) +3 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-kbl-x1275/igt@gem_exec_suspend@basic-s0.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-kbl-x1275/igt@gem_exec_suspend@basic-s0.html

  * igt@kms_flip@basic-flip-vs-modeset@a-dp1:
    - fi-kbl-x1275:       [DMESG-WARN][19] ([i915#62] / [i915#92]) -> [DMESG-WARN][20] ([i915#62] / [i915#92] / [i915#95]) +4 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/fi-kbl-x1275/igt@kms_flip@basic-flip-vs-modeset@a-dp1.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/fi-kbl-x1275/igt@kms_flip@basic-flip-vs-modeset@a-dp1.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2045]: https://gitlab.freedesktop.org/drm/intel/issues/2045
  [i915#2100]: https://gitlab.freedesktop.org/drm/intel/issues/2100
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (44 -> 36)
------------------------------

  Missing    (8): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_8882 -> Patchwork_18358

  CI-20190529: 20190529
  CI_DRM_8882: bc285974fbc945765c176218aba7a003b687eea9 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5769: 4e5f76be680b65780204668e302026cf638decc9 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18358: 0e3d8f6ca520e6f9bef3fd038e99dd8e17581de3 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

0e3d8f6ca520 drm/i915/gt: Apply the CSB w/a for all
f1c36ee09bda drm/i915/gt: Wait for CSB entries on Tigerlake
1c6a740de44a drm/i915/gt: Widen CSB pointer to u64 for the parsers

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/index.html

[-- Attachment #1.2: Type: text/html, Size: 7585 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-08-14 15:57 [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
                   ` (4 preceding siblings ...)
  2020-08-14 16:34 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-08-14 18:07 ` Patchwork
  2020-08-14 18:29 ` [Intel-gfx] [PATCH 1/3] " Mika Kuoppala
  6 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2020-08-14 18:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 15649 bytes --]

== Series Details ==

Series: series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
URL   : https://patchwork.freedesktop.org/series/80627/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8882_full -> Patchwork_18358_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_18358_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_18358_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_18358_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - shard-skl:          [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-skl3/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-skl1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  
Known issues
------------

  Here are the changes found in Patchwork_18358_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_whisper@basic-forked:
    - shard-glk:          [PASS][3] -> [DMESG-WARN][4] ([i915#118] / [i915#95])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-glk8/igt@gem_exec_whisper@basic-forked.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-glk4/igt@gem_exec_whisper@basic-forked.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [PASS][5] -> [FAIL][6] ([i915#454])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-iclb3/igt@i915_pm_dc@dc6-psr.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-iclb2/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_selftest@mock@requests:
    - shard-apl:          [PASS][7] -> [INCOMPLETE][8] ([i915#1635] / [i915#2278])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-apl4/igt@i915_selftest@mock@requests.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-apl8/igt@i915_selftest@mock@requests.html

  * igt@kms_big_fb@y-tiled-64bpp-rotate-180:
    - shard-glk:          [PASS][9] -> [DMESG-FAIL][10] ([i915#118] / [i915#95])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-glk6/igt@kms_big_fb@y-tiled-64bpp-rotate-180.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-glk8/igt@kms_big_fb@y-tiled-64bpp-rotate-180.html

  * igt@kms_color@pipe-b-ctm-negative:
    - shard-skl:          [PASS][11] -> [DMESG-WARN][12] ([i915#1982]) +13 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-skl1/igt@kms_color@pipe-b-ctm-negative.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-skl9/igt@kms_color@pipe-b-ctm-negative.html

  * igt@kms_flip@2x-modeset-vs-vblank-race-interruptible@ab-vga1-hdmi-a1:
    - shard-hsw:          [PASS][13] -> [DMESG-WARN][14] ([i915#1982]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-hsw1/igt@kms_flip@2x-modeset-vs-vblank-race-interruptible@ab-vga1-hdmi-a1.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-hsw6/igt@kms_flip@2x-modeset-vs-vblank-race-interruptible@ab-vga1-hdmi-a1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@a-dp1:
    - shard-kbl:          [PASS][15] -> [DMESG-WARN][16] ([i915#180]) +5 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-kbl7/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-kbl1/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_flip@plain-flip-ts-check-interruptible@a-hdmi-a1:
    - shard-glk:          [PASS][17] -> [FAIL][18] ([i915#2122])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-glk5/igt@kms_flip@plain-flip-ts-check-interruptible@a-hdmi-a1.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-glk3/igt@kms_flip@plain-flip-ts-check-interruptible@a-hdmi-a1.html

  * igt@kms_frontbuffer_tracking@fbc-rgb565-draw-mmap-wc:
    - shard-kbl:          [PASS][19] -> [DMESG-WARN][20] ([i915#1982])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-kbl1/igt@kms_frontbuffer_tracking@fbc-rgb565-draw-mmap-wc.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-kbl6/igt@kms_frontbuffer_tracking@fbc-rgb565-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbcpsr-rgb565-draw-mmap-cpu:
    - shard-tglb:         [PASS][21] -> [DMESG-WARN][22] ([i915#1982]) +3 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-tglb2/igt@kms_frontbuffer_tracking@fbcpsr-rgb565-draw-mmap-cpu.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-tglb3/igt@kms_frontbuffer_tracking@fbcpsr-rgb565-draw-mmap-cpu.html

  * igt@kms_plane@plane-panning-top-left-pipe-a-planes:
    - shard-iclb:         [PASS][23] -> [DMESG-WARN][24] ([i915#1982])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-iclb2/igt@kms_plane@plane-panning-top-left-pipe-a-planes.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-iclb5/igt@kms_plane@plane-panning-top-left-pipe-a-planes.html

  * igt@kms_psr@no_drrs:
    - shard-iclb:         [PASS][25] -> [FAIL][26] ([i915#173])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-iclb5/igt@kms_psr@no_drrs.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-iclb1/igt@kms_psr@no_drrs.html

  * igt@kms_psr@psr2_no_drrs:
    - shard-iclb:         [PASS][27] -> [SKIP][28] ([fdo#109441]) +2 similar issues
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-iclb2/igt@kms_psr@psr2_no_drrs.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-iclb3/igt@kms_psr@psr2_no_drrs.html

  
#### Possible fixes ####

  * igt@gem_ctx_shared@q-smoketest-all:
    - shard-glk:          [DMESG-WARN][29] ([i915#118] / [i915#95]) -> [PASS][30] +1 similar issue
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-glk7/igt@gem_ctx_shared@q-smoketest-all.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-glk5/igt@gem_ctx_shared@q-smoketest-all.html

  * igt@gem_huc_copy@huc-copy:
    - shard-tglb:         [SKIP][31] ([i915#2190]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-tglb6/igt@gem_huc_copy@huc-copy.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-tglb8/igt@gem_huc_copy@huc-copy.html

  * igt@gem_mmap_gtt@fault-concurrent:
    - shard-glk:          [DMESG-WARN][33] ([i915#2165]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-glk9/igt@gem_mmap_gtt@fault-concurrent.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-glk8/igt@gem_mmap_gtt@fault-concurrent.html

  * igt@i915_pm_dc@dc6-dpms:
    - shard-iclb:         [FAIL][35] ([i915#454]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-iclb1/igt@i915_pm_dc@dc6-dpms.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-iclb6/igt@i915_pm_dc@dc6-dpms.html

  * igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size:
    - shard-skl:          [DMESG-WARN][37] ([i915#1982]) -> [PASS][38] +11 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-skl8/igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-skl10/igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size.html

  * igt@kms_flip@flip-vs-expired-vblank@c-edp1:
    - shard-skl:          [FAIL][39] ([i915#79]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-skl9/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-skl9/igt@kms_flip@flip-vs-expired-vblank@c-edp1.html

  * igt@kms_flip@flip-vs-suspend@c-dp1:
    - shard-kbl:          [DMESG-WARN][41] ([i915#180]) -> [PASS][42] +4 similar issues
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-kbl4/igt@kms_flip@flip-vs-suspend@c-dp1.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-kbl4/igt@kms_flip@flip-vs-suspend@c-dp1.html

  * igt@kms_flip_tiling@flip-to-y-tiled:
    - shard-skl:          [FAIL][43] ([i915#167]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-skl10/igt@kms_flip_tiling@flip-to-y-tiled.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-skl6/igt@kms_flip_tiling@flip-to-y-tiled.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-pwrite:
    - shard-tglb:         [DMESG-WARN][45] ([i915#1982]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-tglb5/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-pwrite.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-tglb6/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-pwrite.html

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-draw-render:
    - shard-skl:          [FAIL][47] ([i915#49]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-skl5/igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-draw-render.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-skl8/igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-draw-render.html

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          [FAIL][49] ([fdo#108145] / [i915#265]) -> [PASS][50] +1 similar issue
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-skl8/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html

  * igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping:
    - shard-iclb:         [DMESG-WARN][51] ([i915#1982]) -> [PASS][52]
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-iclb3/igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-iclb5/igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [SKIP][53] ([fdo#109441]) -> [PASS][54] +1 similar issue
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-iclb4/igt@kms_psr@psr2_cursor_render.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-iclb2/igt@kms_psr@psr2_cursor_render.html

  * igt@kms_vblank@pipe-b-ts-continuation-modeset-hang:
    - shard-glk:          [DMESG-WARN][55] ([i915#1982]) -> [PASS][56]
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-glk6/igt@kms_vblank@pipe-b-ts-continuation-modeset-hang.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-glk8/igt@kms_vblank@pipe-b-ts-continuation-modeset-hang.html

  * igt@prime_busy@after@vecs0:
    - shard-hsw:          [FAIL][57] ([i915#2258]) -> [PASS][58] +1 similar issue
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-hsw7/igt@prime_busy@after@vecs0.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-hsw6/igt@prime_busy@after@vecs0.html

  
#### Warnings ####

  * igt@gem_exec_reloc@basic-spin-others@vcs0:
    - shard-snb:          [WARN][59] ([i915#2036]) -> [WARN][60] ([i915#2021])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-snb2/igt@gem_exec_reloc@basic-spin-others@vcs0.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-snb2/igt@gem_exec_reloc@basic-spin-others@vcs0.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][61] ([fdo#109349]) -> [DMESG-WARN][62] ([i915#1226])
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-iclb8/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_flip@flip-vs-suspend-interruptible@a-edp1:
    - shard-skl:          [DMESG-WARN][63] ([i915#1982]) -> [INCOMPLETE][64] ([i915#198])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-skl5/igt@kms_flip@flip-vs-suspend-interruptible@a-edp1.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-skl4/igt@kms_flip@flip-vs-suspend-interruptible@a-edp1.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-7efc:
    - shard-apl:          [DMESG-FAIL][65] ([fdo#108145] / [i915#1635] / [i915#1982]) -> [FAIL][66] ([fdo#108145] / [i915#1635] / [i915#265])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8882/shard-apl1/igt@kms_plane_alpha_blend@pipe-a-alpha-7efc.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/shard-apl8/igt@kms_plane_alpha_blend@pipe-a-alpha-7efc.html

  
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1226]: https://gitlab.freedesktop.org/drm/intel/issues/1226
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#167]: https://gitlab.freedesktop.org/drm/intel/issues/167
  [i915#173]: https://gitlab.freedesktop.org/drm/intel/issues/173
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2021]: https://gitlab.freedesktop.org/drm/intel/issues/2021
  [i915#2036]: https://gitlab.freedesktop.org/drm/intel/issues/2036
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2165]: https://gitlab.freedesktop.org/drm/intel/issues/2165
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2258]: https://gitlab.freedesktop.org/drm/intel/issues/2258
  [i915#2278]: https://gitlab.freedesktop.org/drm/intel/issues/2278
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#49]: https://gitlab.freedesktop.org/drm/intel/issues/49
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (11 -> 11)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_8882 -> Patchwork_18358

  CI-20190529: 20190529
  CI_DRM_8882: bc285974fbc945765c176218aba7a003b687eea9 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5769: 4e5f76be680b65780204668e302026cf638decc9 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18358: 0e3d8f6ca520e6f9bef3fd038e99dd8e17581de3 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18358/index.html

[-- Attachment #1.2: Type: text/html, Size: 18530 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-08-14 15:57   ` [Intel-gfx] " Chris Wilson
@ 2020-08-14 18:07     ` Chang, Bruce
  -1 siblings, 0 replies; 27+ messages in thread
From: Chang, Bruce @ 2020-08-14 18:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala, stable

On 8/14/2020 8:57 AM, Chris Wilson wrote:
> On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> Forcibly evict stale csb entries") where, presumably, due to a missing
> Global Observation Point synchronisation, the write pointer of the CSB
> ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> we see the GPU report more context-switch entries for us to parse, but
> those entries have not been written, leading us to process stale events,
> and eventually report a hung GPU.
>
> However, this effect appears to be much more severe than we previously
> saw on Icelake (though it might be best if we try the same approach
> there as well and measure), and Bruce suggested the good idea of resetting
> the CSB entry after use so that we can detect when it has been updated by
> the GPU. By instrumenting how long that may be, we can set a reliable
> upper bound for how long we should wait for:
>
>      513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
>
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Bruce Chang <yu.bruce.chang@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: stable@vger.kernel.org # v5.4
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
>   1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index db982fc0f0bc..3b8161c6b601 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>    */
>   static inline bool gen12_csb_parse(const u64 *csb)
>   {
> -	u64 entry = READ_ONCE(*csb);
> -	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> -	bool new_queue =
> +	bool ctx_away_valid;
> +	bool new_queue;
> +	u64 entry;
> +
> +	/* XXX HSD */
> +	entry = READ_ONCE(*csb);
> +	if (unlikely(entry == -1)) {
> +		preempt_disable();
> +		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> +			GEM_WARN_ON("50us CSB timeout");

Out tests showed that 10us is not long enough, but 20us worked well. So 
50us should be good enough.

> +		preempt_enable();
> +	}
> +	WRITE_ONCE(*(u64 *)csb, -1);

A wmb() is probably needed here. it should be ok if CSB is in SMEM, but 
in the case CSB is allocated in LMEM, the memory type will be WC, so the 
memory write (WRITE_ONCE) is potentially still in the write combine 
buffer and not in any cache system, i.e., not visible to HW.

> +
> +	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> +	new_queue =
>   		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>   
>   	/*
> @@ -3995,6 +4008,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
>   	WRITE_ONCE(*execlists->csb_write, reset_value);
>   	wmb(); /* Make sure this is visible to HW (paranoia?) */
>   
> +	/* Check that the GPU does indeed update the CSB entries! */
> +	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
>   	invalidate_csb_entries(&execlists->csb_status[0],
>   			       &execlists->csb_status[reset_value]);
>   

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-08-14 18:07     ` Chang, Bruce
  0 siblings, 0 replies; 27+ messages in thread
From: Chang, Bruce @ 2020-08-14 18:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable

On 8/14/2020 8:57 AM, Chris Wilson wrote:
> On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> Forcibly evict stale csb entries") where, presumably, due to a missing
> Global Observation Point synchronisation, the write pointer of the CSB
> ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> we see the GPU report more context-switch entries for us to parse, but
> those entries have not been written, leading us to process stale events,
> and eventually report a hung GPU.
>
> However, this effect appears to be much more severe than we previously
> saw on Icelake (though it might be best if we try the same approach
> there as well and measure), and Bruce suggested the good idea of resetting
> the CSB entry after use so that we can detect when it has been updated by
> the GPU. By instrumenting how long that may be, we can set a reliable
> upper bound for how long we should wait for:
>
>      513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
>
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Bruce Chang <yu.bruce.chang@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: stable@vger.kernel.org # v5.4
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
>   1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index db982fc0f0bc..3b8161c6b601 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>    */
>   static inline bool gen12_csb_parse(const u64 *csb)
>   {
> -	u64 entry = READ_ONCE(*csb);
> -	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> -	bool new_queue =
> +	bool ctx_away_valid;
> +	bool new_queue;
> +	u64 entry;
> +
> +	/* XXX HSD */
> +	entry = READ_ONCE(*csb);
> +	if (unlikely(entry == -1)) {
> +		preempt_disable();
> +		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> +			GEM_WARN_ON("50us CSB timeout");

Out tests showed that 10us is not long enough, but 20us worked well. So 
50us should be good enough.

> +		preempt_enable();
> +	}
> +	WRITE_ONCE(*(u64 *)csb, -1);

A wmb() is probably needed here. it should be ok if CSB is in SMEM, but 
in the case CSB is allocated in LMEM, the memory type will be WC, so the 
memory write (WRITE_ONCE) is potentially still in the write combine 
buffer and not in any cache system, i.e., not visible to HW.

> +
> +	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> +	new_queue =
>   		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>   
>   	/*
> @@ -3995,6 +4008,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
>   	WRITE_ONCE(*execlists->csb_write, reset_value);
>   	wmb(); /* Make sure this is visible to HW (paranoia?) */
>   
> +	/* Check that the GPU does indeed update the CSB entries! */
> +	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64));
>   	invalidate_csb_entries(&execlists->csb_status[0],
>   			       &execlists->csb_status[reset_value]);
>   
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all
  2020-08-14 15:57 ` [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
@ 2020-08-14 18:18   ` Chang, Bruce
  2020-08-14 18:41   ` Mika Kuoppala
  1 sibling, 0 replies; 27+ messages in thread
From: Chang, Bruce @ 2020-08-14 18:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 8/14/2020 8:57 AM, Chris Wilson wrote:
> Since we expect to inline the csb_parse() routines, the w/a for the
> stale CSB data on Tigerlake will be pulled into process_csb(), and so we
> might as well simply reuse the logic for all, and so will hopefully
> avoid any strange behaviour on Icelake that was not covered by our
> previous w/a.
>
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Bruce Chang <yu.bruce.chang@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 70 +++++++++++++++++------------
>   1 file changed, 42 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 3b8161c6b601..c176a029f27b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>    *     bits 47-57: sw context id of the lrc the GT switched away from
>    *     bits 58-63: sw counter of the lrc the GT switched away from
>    */
> -static inline bool gen12_csb_parse(const u64 *csb)
> +static inline bool gen12_csb_parse(const u64 csb)
>   {
> -	bool ctx_away_valid;
> -	bool new_queue;
> -	u64 entry;
> -
> -	/* XXX HSD */
> -	entry = READ_ONCE(*csb);
> -	if (unlikely(entry == -1)) {
> -		preempt_disable();
> -		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> -			GEM_WARN_ON("50us CSB timeout");
> -		preempt_enable();
> -	}
> -	WRITE_ONCE(*(u64 *)csb, -1);
> -
> -	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> -	new_queue =
> -		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> +	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
> +	bool new_queue =
> +		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>   
>   	/*
>   	 * The context switch detail is not guaranteed to be 5 when a preemption
> @@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
>   	 * would require some extra handling, but we don't support that.
>   	 */
>   	if (!ctx_away_valid || new_queue) {
> -		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
> +		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
>   		return true;
>   	}
>   
> @@ -2533,19 +2519,47 @@ static inline bool gen12_csb_parse(const u64 *csb)
>   	 * context switch on an unsuccessful wait instruction since we always
>   	 * use polling mode.
>   	 */
> -	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
> +	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
>   	return false;
>   }
>   
> -static inline bool gen8_csb_parse(const u64 *csb)
> +static inline bool gen8_csb_parse(const u64 csb)
> +{
> +	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +}
> +
> +static inline u64 csb_read(u64 * const csb)
>   {
> -	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +	u64 entry = READ_ONCE(*csb);
> +
> +	/*
> +	 * Unfortunately, the GPU does not always serialise its write
> +	 * of the CSB entries before its write of the CSB pointer, at least
> +	 * from the perspective of the CPU, using what is known as a Global
> +	 * Observation Point. We may read a new CSB tail pointer, but then
> +	 * read the stale CSB entries, causing us to misinterpret the
> +	 * context-switch events, and eventually declare the GPU hung.
> +	 *
> +	 * icl:HSDES#:1806554093
> +	 * tgl:XXX?
FYI: A HSD was also filed recently: HSD# 22011248461
> +	 */
> +	if (unlikely(entry == -1)) {
> +		preempt_disable();
> +		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> +			GEM_WARN_ON("50us CSB timeout");
> +		preempt_enable();
> +	}
> +
> +	/* Consume this entry so that we can spot its future reuse. */
> +	WRITE_ONCE(*csb, -1);
> +
> +	return entry;
>   }
>   
>   static void process_csb(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	const u64 * const buf = execlists->csb_status;
> +	u64 * const buf = execlists->csb_status;
>   	const u8 num_entries = execlists->csb_size;
>   	u8 head, tail;
>   
> @@ -2603,6 +2617,7 @@ static void process_csb(struct intel_engine_cs *engine)
>   	rmb();
>   	do {
>   		bool promote;
> +		u64 csb;
>   
>   		if (++head == num_entries)
>   			head = 0;
> @@ -2625,15 +2640,14 @@ static void process_csb(struct intel_engine_cs *engine)
>   		 * status notifier.
>   		 */
>   
> +		csb = csb_read(buf + head);
>   		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
> -			     head,
> -			     upper_32_bits(buf[head]),
> -			     lower_32_bits(buf[head]));
> +			     head, upper_32_bits(csb), lower_32_bits(csb));
Nice change! The original trace will actually read the CSB entry. So 
when the trace was enabled, our issue will go away since one extra read 
will fix our issue.
>   
>   		if (INTEL_GEN(engine->i915) >= 12)
> -			promote = gen12_csb_parse(buf + head);
> +			promote = gen12_csb_parse(csb);
>   		else
> -			promote = gen8_csb_parse(buf + head);
> +			promote = gen8_csb_parse(csb);
>   		if (promote) {
>   			struct i915_request * const *old = execlists->active;
>   
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-08-14 15:57 [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
                   ` (5 preceding siblings ...)
  2020-08-14 18:07 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
@ 2020-08-14 18:29 ` Mika Kuoppala
  2020-08-14 19:43   ` Chris Wilson
  6 siblings, 1 reply; 27+ messages in thread
From: Mika Kuoppala @ 2020-08-14 18:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> A CSB entry is 64b, and it is simpler for us to treat it as an array of
> 64b entries than as an array of pairs of 32b entries.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
>  drivers/gpu/drm/i915/gt/intel_lrc.c          | 33 ++++++++++----------
>  2 files changed, 17 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index c400aaa2287b..ee6312601c56 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -278,7 +278,7 @@ struct intel_engine_execlists {
>  	 *
>  	 * Note these register may be either mmio or HWSP shadow.
>  	 */
> -	u32 *csb_status;
> +	u64 *csb_status;
>  
>  	/**
>  	 * @csb_size: context status buffer FIFO size
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 82742c6f423c..db982fc0f0bc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2464,7 +2464,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
>  }
>  
>  static inline void
> -invalidate_csb_entries(const u32 *first, const u32 *last)
> +invalidate_csb_entries(const u64 *first, const u64 *last)
>  {
>  	clflush((void *)first);
>  	clflush((void *)last);
> @@ -2496,14 +2496,12 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
>   *     bits 47-57: sw context id of the lrc the GT switched away from
>   *     bits 58-63: sw counter of the lrc the GT switched away from
>   */
> -static inline bool
> -gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> +static inline bool gen12_csb_parse(const u64 *csb)
>  {
> -	u32 lower_dw = csb[0];
> -	u32 upper_dw = csb[1];
> -	bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
> -	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
> -	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> +	u64 entry = READ_ONCE(*csb);
> +	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> +	bool new_queue =
> +		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;

Opportunity to constify, tho stylistic.

I have felt the urge to do this for long, but back then the gainz
were not this clear.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  
>  	/*
>  	 * The context switch detail is not guaranteed to be 5 when a preemption
> @@ -2513,7 +2511,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>  	 * would require some extra handling, but we don't support that.
>  	 */
>  	if (!ctx_away_valid || new_queue) {
> -		GEM_BUG_ON(!ctx_to_valid);
> +		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
>  		return true;
>  	}
>  
> @@ -2522,12 +2520,11 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>  	 * context switch on an unsuccessful wait instruction since we always
>  	 * use polling mode.
>  	 */
> -	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
> +	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
>  	return false;
>  }
>  
> -static inline bool
> -gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> +static inline bool gen8_csb_parse(const u64 *csb)
>  {
>  	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
>  }
> @@ -2535,7 +2532,7 @@ gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>  static void process_csb(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	const u32 * const buf = execlists->csb_status;
> +	const u64 * const buf = execlists->csb_status;
>  	const u8 num_entries = execlists->csb_size;
>  	u8 head, tail;
>  
> @@ -2616,12 +2613,14 @@ static void process_csb(struct intel_engine_cs *engine)
>  		 */
>  
>  		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
> -			     head, buf[2 * head + 0], buf[2 * head + 1]);
> +			     head,
> +			     upper_32_bits(buf[head]),
> +			     lower_32_bits(buf[head]));
>  
>  		if (INTEL_GEN(engine->i915) >= 12)
> -			promote = gen12_csb_parse(execlists, buf + 2 * head);
> +			promote = gen12_csb_parse(buf + head);
>  		else
> -			promote = gen8_csb_parse(execlists, buf + 2 * head);
> +			promote = gen8_csb_parse(buf + head);
>  		if (promote) {
>  			struct i915_request * const *old = execlists->active;
>  
> @@ -5148,7 +5147,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>  	}
>  
>  	execlists->csb_status =
> -		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
> +		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
>  
>  	execlists->csb_write =
>  		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-08-14 18:07     ` [Intel-gfx] " Chang, Bruce
@ 2020-08-14 18:38       ` Chris Wilson
  -1 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 18:38 UTC (permalink / raw)
  To: Chang, Bruce, intel-gfx; +Cc: stable

Quoting Chang, Bruce (2020-08-14 19:07:53)
> On 8/14/2020 8:57 AM, Chris Wilson wrote:
> > On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> > Forcibly evict stale csb entries") where, presumably, due to a missing
> > Global Observation Point synchronisation, the write pointer of the CSB
> > ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> > we see the GPU report more context-switch entries for us to parse, but
> > those entries have not been written, leading us to process stale events,
> > and eventually report a hung GPU.
> >
> > However, this effect appears to be much more severe than we previously
> > saw on Icelake (though it might be best if we try the same approach
> > there as well and measure), and Bruce suggested the good idea of resetting
> > the CSB entry after use so that we can detect when it has been updated by
> > the GPU. By instrumenting how long that may be, we can set a reliable
> > upper bound for how long we should wait for:
> >
> >      513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
> >
> > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> > References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> > Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Bruce Chang <yu.bruce.chang@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: stable@vger.kernel.org # v5.4
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
> >   1 file changed, 18 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index db982fc0f0bc..3b8161c6b601 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
> >    */
> >   static inline bool gen12_csb_parse(const u64 *csb)
> >   {
> > -     u64 entry = READ_ONCE(*csb);
> > -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> > -     bool new_queue =
> > +     bool ctx_away_valid;
> > +     bool new_queue;
> > +     u64 entry;
> > +
> > +     /* XXX HSD */
> > +     entry = READ_ONCE(*csb);
> > +     if (unlikely(entry == -1)) {
> > +             preempt_disable();
> > +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> > +                     GEM_WARN_ON("50us CSB timeout");
> 
> Out tests showed that 10us is not long enough, but 20us worked well. So 
> 50us should be good enough.
> 
> > +             preempt_enable();
> > +     }
> > +     WRITE_ONCE(*(u64 *)csb, -1);
> 
> A wmb() is probably needed here. it should be ok if CSB is in SMEM, but 
> in the case CSB is allocated in LMEM, the memory type will be WC, so the 
> memory write (WRITE_ONCE) is potentially still in the write combine 
> buffer and not in any cache system, i.e., not visible to HW.

There's a trick here. Before the GPU can wrap the CSB ringbuffer, there
must be a register write that itself will flush the WCB. Not only that,
we will have an explicit wmb() prior to that register write. Sneaky.

We probably want to avoid putting the HWSP into WC and fix whatever
cache snooping is required. At least to the point of being able to
measure the impact as we read from HWSP (and "HWSP") frequently.
-Chris

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-08-14 18:38       ` Chris Wilson
  0 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 18:38 UTC (permalink / raw)
  To: Chang, Bruce, intel-gfx; +Cc: stable

Quoting Chang, Bruce (2020-08-14 19:07:53)
> On 8/14/2020 8:57 AM, Chris Wilson wrote:
> > On Tigerlake, we are seeing a repeat of commit d8f505311717 ("drm/i915/icl:
> > Forcibly evict stale csb entries") where, presumably, due to a missing
> > Global Observation Point synchronisation, the write pointer of the CSB
> > ringbuffer is updated _prior_ to the contents of the ringbuffer. That is
> > we see the GPU report more context-switch entries for us to parse, but
> > those entries have not been written, leading us to process stale events,
> > and eventually report a hung GPU.
> >
> > However, this effect appears to be much more severe than we previously
> > saw on Icelake (though it might be best if we try the same approach
> > there as well and measure), and Bruce suggested the good idea of resetting
> > the CSB entry after use so that we can detect when it has been updated by
> > the GPU. By instrumenting how long that may be, we can set a reliable
> > upper bound for how long we should wait for:
> >
> >      513 late, avg of 61 retries (590 ns), max of 1061 retries (10099 ns)
> >
> > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2045
> > References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> > Suggested-by: Bruce Chang <yu.bruce.chang@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Bruce Chang <yu.bruce.chang@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: stable@vger.kernel.org # v5.4
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c | 21 ++++++++++++++++++---
> >   1 file changed, 18 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index db982fc0f0bc..3b8161c6b601 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
> >    */
> >   static inline bool gen12_csb_parse(const u64 *csb)
> >   {
> > -     u64 entry = READ_ONCE(*csb);
> > -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> > -     bool new_queue =
> > +     bool ctx_away_valid;
> > +     bool new_queue;
> > +     u64 entry;
> > +
> > +     /* XXX HSD */
> > +     entry = READ_ONCE(*csb);
> > +     if (unlikely(entry == -1)) {
> > +             preempt_disable();
> > +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> > +                     GEM_WARN_ON("50us CSB timeout");
> 
> Out tests showed that 10us is not long enough, but 20us worked well. So 
> 50us should be good enough.
> 
> > +             preempt_enable();
> > +     }
> > +     WRITE_ONCE(*(u64 *)csb, -1);
> 
> A wmb() is probably needed here. it should be ok if CSB is in SMEM, but 
> in the case CSB is allocated in LMEM, the memory type will be WC, so the 
> memory write (WRITE_ONCE) is potentially still in the write combine 
> buffer and not in any cache system, i.e., not visible to HW.

There's a trick here. Before the GPU can wrap the CSB ringbuffer, there
must be a register write that itself will flush the WCB. Not only that,
we will have an explicit wmb() prior to that register write. Sneaky.

We probably want to avoid putting the HWSP into WC and fix whatever
cache snooping is required. At least to the point of being able to
measure the impact as we read from HWSP (and "HWSP") frequently.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all
  2020-08-14 15:57 ` [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
  2020-08-14 18:18   ` Chang, Bruce
@ 2020-08-14 18:41   ` Mika Kuoppala
  2020-08-14 19:41     ` Chris Wilson
  1 sibling, 1 reply; 27+ messages in thread
From: Mika Kuoppala @ 2020-08-14 18:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since we expect to inline the csb_parse() routines, the w/a for the
> stale CSB data on Tigerlake will be pulled into process_csb(), and so we
> might as well simply reuse the logic for all, and so will hopefully
> avoid any strange behaviour on Icelake that was not covered by our
> previous w/a.
>
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Bruce Chang <yu.bruce.chang@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 70 +++++++++++++++++------------
>  1 file changed, 42 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 3b8161c6b601..c176a029f27b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>   *     bits 47-57: sw context id of the lrc the GT switched away from
>   *     bits 58-63: sw counter of the lrc the GT switched away from
>   */
> -static inline bool gen12_csb_parse(const u64 *csb)
> +static inline bool gen12_csb_parse(const u64 csb)
>  {
> -	bool ctx_away_valid;
> -	bool new_queue;
> -	u64 entry;
> -
> -	/* XXX HSD */
> -	entry = READ_ONCE(*csb);
> -	if (unlikely(entry == -1)) {
> -		preempt_disable();
> -		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))

If we get this deep into desperation, should we start to apply more
pressure. Ie, rmb instead of just instructing the compiler. And could also
start to invalidate the entry which obviously if of no use.

It could even be that the invalidate pays out as the correct value
bubbles throught hierarchy faster?

Weird.

*shrug*

-Mika

> -			GEM_WARN_ON("50us CSB timeout");
> -		preempt_enable();
> -	}
> -	WRITE_ONCE(*(u64 *)csb, -1);
> -
> -	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> -	new_queue =
> -		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> +	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
> +	bool new_queue =
> +		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>  
>  	/*
>  	 * The context switch detail is not guaranteed to be 5 when a preemption
> @@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
>  	 * would require some extra handling, but we don't support that.
>  	 */
>  	if (!ctx_away_valid || new_queue) {
> -		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
> +		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
>  		return true;
>  	}
>  
> @@ -2533,19 +2519,47 @@ static inline bool gen12_csb_parse(const u64 *csb)
>  	 * context switch on an unsuccessful wait instruction since we always
>  	 * use polling mode.
>  	 */
> -	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
> +	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
>  	return false;
>  }
>  
> -static inline bool gen8_csb_parse(const u64 *csb)
> +static inline bool gen8_csb_parse(const u64 csb)
> +{
> +	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +}
> +
> +static inline u64 csb_read(u64 * const csb)
>  {
> -	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +	u64 entry = READ_ONCE(*csb);
> +
> +	/*
> +	 * Unfortunately, the GPU does not always serialise its write
> +	 * of the CSB entries before its write of the CSB pointer, at least
> +	 * from the perspective of the CPU, using what is known as a Global
> +	 * Observation Point. We may read a new CSB tail pointer, but then
> +	 * read the stale CSB entries, causing us to misinterpret the
> +	 * context-switch events, and eventually declare the GPU hung.
> +	 *
> +	 * icl:HSDES#:1806554093
> +	 * tgl:XXX?
> +	 */
> +	if (unlikely(entry == -1)) {
> +		preempt_disable();
> +		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> +			GEM_WARN_ON("50us CSB timeout");
> +		preempt_enable();
> +	}
> +
> +	/* Consume this entry so that we can spot its future reuse. */
> +	WRITE_ONCE(*csb, -1);
> +
> +	return entry;
>  }
>  
>  static void process_csb(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	const u64 * const buf = execlists->csb_status;
> +	u64 * const buf = execlists->csb_status;
>  	const u8 num_entries = execlists->csb_size;
>  	u8 head, tail;
>  
> @@ -2603,6 +2617,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  	rmb();
>  	do {
>  		bool promote;
> +		u64 csb;
>  
>  		if (++head == num_entries)
>  			head = 0;
> @@ -2625,15 +2640,14 @@ static void process_csb(struct intel_engine_cs *engine)
>  		 * status notifier.
>  		 */
>  
> +		csb = csb_read(buf + head);
>  		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
> -			     head,
> -			     upper_32_bits(buf[head]),
> -			     lower_32_bits(buf[head]));
> +			     head, upper_32_bits(csb), lower_32_bits(csb));
>  
>  		if (INTEL_GEN(engine->i915) >= 12)
> -			promote = gen12_csb_parse(buf + head);
> +			promote = gen12_csb_parse(csb);
>  		else
> -			promote = gen8_csb_parse(buf + head);
> +			promote = gen8_csb_parse(csb);
>  		if (promote) {
>  			struct i915_request * const *old = execlists->active;
>  
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all
  2020-08-14 18:41   ` Mika Kuoppala
@ 2020-08-14 19:41     ` Chris Wilson
  2020-08-14 20:18       ` Chris Wilson
  0 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 19:41 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-08-14 19:41:14)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Since we expect to inline the csb_parse() routines, the w/a for the
> > stale CSB data on Tigerlake will be pulled into process_csb(), and so we
> > might as well simply reuse the logic for all, and so will hopefully
> > avoid any strange behaviour on Icelake that was not covered by our
> > previous w/a.
> >
> > References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Bruce Chang <yu.bruce.chang@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_lrc.c | 70 +++++++++++++++++------------
> >  1 file changed, 42 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 3b8161c6b601..c176a029f27b 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
> >   *     bits 47-57: sw context id of the lrc the GT switched away from
> >   *     bits 58-63: sw counter of the lrc the GT switched away from
> >   */
> > -static inline bool gen12_csb_parse(const u64 *csb)
> > +static inline bool gen12_csb_parse(const u64 csb)
> >  {
> > -     bool ctx_away_valid;
> > -     bool new_queue;
> > -     u64 entry;
> > -
> > -     /* XXX HSD */
> > -     entry = READ_ONCE(*csb);
> > -     if (unlikely(entry == -1)) {
> > -             preempt_disable();
> > -             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> 
> If we get this deep into desperation, should we start to apply more
> pressure. Ie, rmb instead of just instructing the compiler. And could also
> start to invalidate the entry which obviously if of no use.

I had a rmb() here; removing it did not appear to make any difference
whatsoever to the average delay. The extreme case would be a full
mb(); clflush(); mb() read. I haven't timed the average for that....
 
> It could even be that the invalidate pays out as the correct value
> bubbles throught hierarchy faster?

I had the same thought... But atm my feeling is the issue is not on the
CPU side (or at least controllable from our code on the CPU).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers
  2020-08-14 18:29 ` [Intel-gfx] [PATCH 1/3] " Mika Kuoppala
@ 2020-08-14 19:43   ` Chris Wilson
  0 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 19:43 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-08-14 19:29:03)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > A CSB entry is 64b, and it is simpler for us to treat it as an array of
> > 64b entries than as an array of pairs of 32b entries.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
> >  drivers/gpu/drm/i915/gt/intel_lrc.c          | 33 ++++++++++----------
> >  2 files changed, 17 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index c400aaa2287b..ee6312601c56 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -278,7 +278,7 @@ struct intel_engine_execlists {
> >        *
> >        * Note these register may be either mmio or HWSP shadow.
> >        */
> > -     u32 *csb_status;
> > +     u64 *csb_status;
> >  
> >       /**
> >        * @csb_size: context status buffer FIFO size
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 82742c6f423c..db982fc0f0bc 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -2464,7 +2464,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
> >  }
> >  
> >  static inline void
> > -invalidate_csb_entries(const u32 *first, const u32 *last)
> > +invalidate_csb_entries(const u64 *first, const u64 *last)
> >  {
> >       clflush((void *)first);
> >       clflush((void *)last);
> > @@ -2496,14 +2496,12 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
> >   *     bits 47-57: sw context id of the lrc the GT switched away from
> >   *     bits 58-63: sw counter of the lrc the GT switched away from
> >   */
> > -static inline bool
> > -gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> > +static inline bool gen12_csb_parse(const u64 *csb)
> >  {
> > -     u32 lower_dw = csb[0];
> > -     u32 upper_dw = csb[1];
> > -     bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
> > -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
> > -     bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> > +     u64 entry = READ_ONCE(*csb);
> > +     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> > +     bool new_queue =
> > +             lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> 
> Opportunity to constify, tho stylistic.

Opportunity lost in the next patch, found again in the 3rd patch. If you
get really fancy, we only use them once. gcc is already smart enough to
reduce the pair down to a trivial set of bit ops rather than conditions.
So I left it alone.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all
  2020-08-14 19:41     ` Chris Wilson
@ 2020-08-14 20:18       ` Chris Wilson
  2020-08-17  9:02         ` Mika Kuoppala
  0 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2020-08-14 20:18 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Chris Wilson (2020-08-14 20:41:32)
> Quoting Mika Kuoppala (2020-08-14 19:41:14)
> > Chris Wilson <chris@chris-wilson.co.uk> writes:
> > > -     entry = READ_ONCE(*csb);
> > > -     if (unlikely(entry == -1)) {
> > > -             preempt_disable();
> > > -             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> > 
> > If we get this deep into desperation, should we start to apply more
> > pressure. Ie, rmb instead of just instructing the compiler. And could also
> > start to invalidate the entry which obviously if of no use.
> 
> I had a rmb() here; removing it did not appear to make any difference
> whatsoever to the average delay. The extreme case would be a full
> mb(); clflush(); mb() read. I haven't timed the average for that....

+static inline u64 __csb_read(u64 *csb)
+{
+       mb();
+       clflush(csb);
+       mb();
+
+       return READ_ONCE(*csb);
+}

[ 1554.274204] csb: 1793 misses, avg 475ns, max 14727ns

So no better on average or at worst.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-08-14 18:38       ` Chris Wilson
@ 2020-08-15  0:36         ` Chang, Bruce
  -1 siblings, 0 replies; 27+ messages in thread
From: Chang, Bruce @ 2020-08-15  0:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


>>> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>>>     */
>>>    static inline bool gen12_csb_parse(const u64 *csb)
>>>    {
>>> -     u64 entry = READ_ONCE(*csb);
>>> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
>>> -     bool new_queue =
>>> +     bool ctx_away_valid;
>>> +     bool new_queue;
>>> +     u64 entry;
>>> +
>>> +     /* XXX HSD */
>>> +     entry = READ_ONCE(*csb);
>>> +     if (unlikely(entry == -1)) {
>>> +             preempt_disable();
>>> +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
>>> +                     GEM_WARN_ON("50us CSB timeout");
>> Out tests showed that 10us is not long enough, but 20us worked well. So
>> 50us should be good enough.

Just realized this may not fully work, as one of the common issue we run 
into is that higher 32bit is updated from the HW, but lower 32bit update 
at a later time: meaning the csb will read like 0xFFFFFFFF:xxxxxxxx 
(low:high) . So this check (!= -1) can still pass but with a partial 
invalid csb status. So, we may need to check each 32bit separately.

>>> +             preempt_enable();
>>> +     }
>>> +     WRITE_ONCE(*(u64 *)csb, -1);
>> A wmb() is probably needed here. it should be ok if CSB is in SMEM, but
>> in the case CSB is allocated in LMEM, the memory type will be WC, so the
>> memory write (WRITE_ONCE) is potentially still in the write combine
>> buffer and not in any cache system, i.e., not visible to HW.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-08-15  0:36         ` Chang, Bruce
  0 siblings, 0 replies; 27+ messages in thread
From: Chang, Bruce @ 2020-08-15  0:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


>>> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>>>     */
>>>    static inline bool gen12_csb_parse(const u64 *csb)
>>>    {
>>> -     u64 entry = READ_ONCE(*csb);
>>> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
>>> -     bool new_queue =
>>> +     bool ctx_away_valid;
>>> +     bool new_queue;
>>> +     u64 entry;
>>> +
>>> +     /* XXX HSD */
>>> +     entry = READ_ONCE(*csb);
>>> +     if (unlikely(entry == -1)) {
>>> +             preempt_disable();
>>> +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
>>> +                     GEM_WARN_ON("50us CSB timeout");
>> Out tests showed that 10us is not long enough, but 20us worked well. So
>> 50us should be good enough.

Just realized this may not fully work, as one of the common issue we run 
into is that higher 32bit is updated from the HW, but lower 32bit update 
at a later time: meaning the csb will read like 0xFFFFFFFF:xxxxxxxx 
(low:high) . So this check (!= -1) can still pass but with a partial 
invalid csb status. So, we may need to check each 32bit separately.

>>> +             preempt_enable();
>>> +     }
>>> +     WRITE_ONCE(*(u64 *)csb, -1);
>> A wmb() is probably needed here. it should be ok if CSB is in SMEM, but
>> in the case CSB is allocated in LMEM, the memory type will be WC, so the
>> memory write (WRITE_ONCE) is potentially still in the write combine
>> buffer and not in any cache system, i.e., not visible to HW.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-08-15  0:36         ` Chang, Bruce
@ 2020-08-15  2:16           ` Chang, Bruce
  -1 siblings, 0 replies; 27+ messages in thread
From: Chang, Bruce @ 2020-08-15  2:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable

On 8/14/2020 5:36 PM, Chang, Bruce wrote:
>
>>>> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, 
>>>> const u64 *last)
>>>>     */
>>>>    static inline bool gen12_csb_parse(const u64 *csb)
>>>>    {
>>>> -     u64 entry = READ_ONCE(*csb);
>>>> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
>>>> -     bool new_queue =
>>>> +     bool ctx_away_valid;
>>>> +     bool new_queue;
>>>> +     u64 entry;
>>>> +
>>>> +     /* XXX HSD */
>>>> +     entry = READ_ONCE(*csb);
>>>> +     if (unlikely(entry == -1)) {
>>>> +             preempt_disable();
>>>> +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != 
>>>> -1, 50))
>>>> +                     GEM_WARN_ON("50us CSB timeout");
>>> Out tests showed that 10us is not long enough, but 20us worked well. So
>>> 50us should be good enough.
>
> Just realized this may not fully work, as one of the common issue we 
> run into is that higher 32bit is updated from the HW, but lower 32bit 
> update at a later time: meaning the csb will read like 
> 0xFFFFFFFF:xxxxxxxx (low:high) . So this check (!= -1) can still pass 
> but with a partial invalid csb status. So, we may need to check each 
> 32bit separately.
>
After tested, with the new 64bit read, the above issue never happened so 
far. So, it seems this only applicable to 32bit read (CSB updated 
between the two lower and high 32bit reads). Assuming the HW 64bit CSB 
update is also atomic, the above code should be fine.

>>>> +             preempt_enable();
>>>> +     }
>>>> +     WRITE_ONCE(*(u64 *)csb, -1);
>>> A wmb() is probably needed here. it should be ok if CSB is in SMEM, but
>>> in the case CSB is allocated in LMEM, the memory type will be WC, so 
>>> the
>>> memory write (WRITE_ONCE) is potentially still in the write combine
>>> buffer and not in any cache system, i.e., not visible to HW.
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-08-15  2:16           ` Chang, Bruce
  0 siblings, 0 replies; 27+ messages in thread
From: Chang, Bruce @ 2020-08-15  2:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable

On 8/14/2020 5:36 PM, Chang, Bruce wrote:
>
>>>> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, 
>>>> const u64 *last)
>>>>     */
>>>>    static inline bool gen12_csb_parse(const u64 *csb)
>>>>    {
>>>> -     u64 entry = READ_ONCE(*csb);
>>>> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
>>>> -     bool new_queue =
>>>> +     bool ctx_away_valid;
>>>> +     bool new_queue;
>>>> +     u64 entry;
>>>> +
>>>> +     /* XXX HSD */
>>>> +     entry = READ_ONCE(*csb);
>>>> +     if (unlikely(entry == -1)) {
>>>> +             preempt_disable();
>>>> +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != 
>>>> -1, 50))
>>>> +                     GEM_WARN_ON("50us CSB timeout");
>>> Out tests showed that 10us is not long enough, but 20us worked well. So
>>> 50us should be good enough.
>
> Just realized this may not fully work, as one of the common issue we 
> run into is that higher 32bit is updated from the HW, but lower 32bit 
> update at a later time: meaning the csb will read like 
> 0xFFFFFFFF:xxxxxxxx (low:high) . So this check (!= -1) can still pass 
> but with a partial invalid csb status. So, we may need to check each 
> 32bit separately.
>
After tested, with the new 64bit read, the above issue never happened so 
far. So, it seems this only applicable to 32bit read (CSB updated 
between the two lower and high 32bit reads). Assuming the HW 64bit CSB 
update is also atomic, the above code should be fine.

>>>> +             preempt_enable();
>>>> +     }
>>>> +     WRITE_ONCE(*(u64 *)csb, -1);
>>> A wmb() is probably needed here. it should be ok if CSB is in SMEM, but
>>> in the case CSB is allocated in LMEM, the memory type will be WC, so 
>>> the
>>> memory write (WRITE_ONCE) is potentially still in the write combine
>>> buffer and not in any cache system, i.e., not visible to HW.
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-08-15  0:36         ` Chang, Bruce
@ 2020-08-15  9:53           ` Chris Wilson
  -1 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-15  9:53 UTC (permalink / raw)
  To: Chang, Bruce, intel-gfx; +Cc: stable

Quoting Chang, Bruce (2020-08-15 01:36:10)
> 
> >>> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
> >>>     */
> >>>    static inline bool gen12_csb_parse(const u64 *csb)
> >>>    {
> >>> -     u64 entry = READ_ONCE(*csb);
> >>> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> >>> -     bool new_queue =
> >>> +     bool ctx_away_valid;
> >>> +     bool new_queue;
> >>> +     u64 entry;
> >>> +
> >>> +     /* XXX HSD */
> >>> +     entry = READ_ONCE(*csb);
> >>> +     if (unlikely(entry == -1)) {
> >>> +             preempt_disable();
> >>> +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> >>> +                     GEM_WARN_ON("50us CSB timeout");
> >> Out tests showed that 10us is not long enough, but 20us worked well. So
> >> 50us should be good enough.
> 
> Just realized this may not fully work, as one of the common issue we run 
> into is that higher 32bit is updated from the HW, but lower 32bit update 
> at a later time: meaning the csb will read like 0xFFFFFFFF:xxxxxxxx 
> (low:high) . So this check (!= -1) can still pass but with a partial 
> invalid csb status. So, we may need to check each 32bit separately.

Hence the transformation to use u64 as the entry type :)
-Chris

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-08-15  9:53           ` Chris Wilson
  0 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-15  9:53 UTC (permalink / raw)
  To: Chang, Bruce, intel-gfx; +Cc: stable

Quoting Chang, Bruce (2020-08-15 01:36:10)
> 
> >>> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
> >>>     */
> >>>    static inline bool gen12_csb_parse(const u64 *csb)
> >>>    {
> >>> -     u64 entry = READ_ONCE(*csb);
> >>> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> >>> -     bool new_queue =
> >>> +     bool ctx_away_valid;
> >>> +     bool new_queue;
> >>> +     u64 entry;
> >>> +
> >>> +     /* XXX HSD */
> >>> +     entry = READ_ONCE(*csb);
> >>> +     if (unlikely(entry == -1)) {
> >>> +             preempt_disable();
> >>> +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> >>> +                     GEM_WARN_ON("50us CSB timeout");
> >> Out tests showed that 10us is not long enough, but 20us worked well. So
> >> 50us should be good enough.
> 
> Just realized this may not fully work, as one of the common issue we run 
> into is that higher 32bit is updated from the HW, but lower 32bit update 
> at a later time: meaning the csb will read like 0xFFFFFFFF:xxxxxxxx 
> (low:high) . So this check (!= -1) can still pass but with a partial 
> invalid csb status. So, we may need to check each 32bit separately.

Hence the transformation to use u64 as the entry type :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
  2020-08-15  2:16           ` Chang, Bruce
@ 2020-08-15  9:59             ` Chris Wilson
  -1 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-15  9:59 UTC (permalink / raw)
  To: Chang, Bruce, intel-gfx; +Cc: stable

Quoting Chang, Bruce (2020-08-15 03:16:58)
> On 8/14/2020 5:36 PM, Chang, Bruce wrote:
> >
> >>>> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, 
> >>>> const u64 *last)
> >>>>     */
> >>>>    static inline bool gen12_csb_parse(const u64 *csb)
> >>>>    {
> >>>> -     u64 entry = READ_ONCE(*csb);
> >>>> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> >>>> -     bool new_queue =
> >>>> +     bool ctx_away_valid;
> >>>> +     bool new_queue;
> >>>> +     u64 entry;
> >>>> +
> >>>> +     /* XXX HSD */
> >>>> +     entry = READ_ONCE(*csb);
> >>>> +     if (unlikely(entry == -1)) {
> >>>> +             preempt_disable();
> >>>> +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != 
> >>>> -1, 50))
> >>>> +                     GEM_WARN_ON("50us CSB timeout");
> >>> Out tests showed that 10us is not long enough, but 20us worked well. So
> >>> 50us should be good enough.
> >
> > Just realized this may not fully work, as one of the common issue we 
> > run into is that higher 32bit is updated from the HW, but lower 32bit 
> > update at a later time: meaning the csb will read like 
> > 0xFFFFFFFF:xxxxxxxx (low:high) . So this check (!= -1) can still pass 
> > but with a partial invalid csb status. So, we may need to check each 
> > 32bit separately.
> >
> After tested, with the new 64bit read, the above issue never happened so 
> far. So, it seems this only applicable to 32bit read (CSB updated 
> between the two lower and high 32bit reads). Assuming the HW 64bit CSB 
> update is also atomic, the above code should be fine.

Fortunately for all the platforms we care about here, READ_ONCE(u64)
will be a single 64b read and so both lower/upper dwords will be pulled
from the same bus transfer. We really need a compiler warning for when
READ_ONCE() is not a singular atomic operation. atomic64_t has too much
connotation with cross-core atomicity for my liking when dealing with
[cacheable] mmio semantics.
-Chris

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake
@ 2020-08-15  9:59             ` Chris Wilson
  0 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2020-08-15  9:59 UTC (permalink / raw)
  To: Chang, Bruce, intel-gfx; +Cc: stable

Quoting Chang, Bruce (2020-08-15 03:16:58)
> On 8/14/2020 5:36 PM, Chang, Bruce wrote:
> >
> >>>> @@ -2498,9 +2498,22 @@ invalidate_csb_entries(const u64 *first, 
> >>>> const u64 *last)
> >>>>     */
> >>>>    static inline bool gen12_csb_parse(const u64 *csb)
> >>>>    {
> >>>> -     u64 entry = READ_ONCE(*csb);
> >>>> -     bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> >>>> -     bool new_queue =
> >>>> +     bool ctx_away_valid;
> >>>> +     bool new_queue;
> >>>> +     u64 entry;
> >>>> +
> >>>> +     /* XXX HSD */
> >>>> +     entry = READ_ONCE(*csb);
> >>>> +     if (unlikely(entry == -1)) {
> >>>> +             preempt_disable();
> >>>> +             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != 
> >>>> -1, 50))
> >>>> +                     GEM_WARN_ON("50us CSB timeout");
> >>> Out tests showed that 10us is not long enough, but 20us worked well. So
> >>> 50us should be good enough.
> >
> > Just realized this may not fully work, as one of the common issue we 
> > run into is that higher 32bit is updated from the HW, but lower 32bit 
> > update at a later time: meaning the csb will read like 
> > 0xFFFFFFFF:xxxxxxxx (low:high) . So this check (!= -1) can still pass 
> > but with a partial invalid csb status. So, we may need to check each 
> > 32bit separately.
> >
> After tested, with the new 64bit read, the above issue never happened so 
> far. So, it seems this only applicable to 32bit read (CSB updated 
> between the two lower and high 32bit reads). Assuming the HW 64bit CSB 
> update is also atomic, the above code should be fine.

Fortunately for all the platforms we care about here, READ_ONCE(u64)
will be a single 64b read and so both lower/upper dwords will be pulled
from the same bus transfer. We really need a compiler warning for when
READ_ONCE() is not a singular atomic operation. atomic64_t has too much
connotation with cross-core atomicity for my liking when dealing with
[cacheable] mmio semantics.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all
  2020-08-14 20:18       ` Chris Wilson
@ 2020-08-17  9:02         ` Mika Kuoppala
  0 siblings, 0 replies; 27+ messages in thread
From: Mika Kuoppala @ 2020-08-17  9:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Chris Wilson (2020-08-14 20:41:32)
>> Quoting Mika Kuoppala (2020-08-14 19:41:14)
>> > Chris Wilson <chris@chris-wilson.co.uk> writes:
>> > > -     entry = READ_ONCE(*csb);
>> > > -     if (unlikely(entry == -1)) {
>> > > -             preempt_disable();
>> > > -             if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
>> > 
>> > If we get this deep into desperation, should we start to apply more
>> > pressure. Ie, rmb instead of just instructing the compiler. And could also
>> > start to invalidate the entry which obviously if of no use.
>> 
>> I had a rmb() here; removing it did not appear to make any difference
>> whatsoever to the average delay. The extreme case would be a full
>> mb(); clflush(); mb() read. I haven't timed the average for that....
>
> +static inline u64 __csb_read(u64 *csb)
> +{
> +       mb();
> +       clflush(csb);
> +       mb();
> +
> +       return READ_ONCE(*csb);
> +}
>
> [ 1554.274204] csb: 1793 misses, avg 475ns, max 14727ns
>
> So no better on average or at worst.

Well thanks for trying it out. We can attach it to hsdes as
a note that there is not much to be done on cpu side :O
-Mika

> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2020-08-17  9:03 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-14 15:57 [Intel-gfx] [PATCH 1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Chris Wilson
2020-08-14 15:57 ` [PATCH 2/3] drm/i915/gt: Wait for CSB entries on Tigerlake Chris Wilson
2020-08-14 15:57   ` [Intel-gfx] " Chris Wilson
2020-08-14 18:07   ` Chang, Bruce
2020-08-14 18:07     ` [Intel-gfx] " Chang, Bruce
2020-08-14 18:38     ` Chris Wilson
2020-08-14 18:38       ` Chris Wilson
2020-08-15  0:36       ` Chang, Bruce
2020-08-15  0:36         ` Chang, Bruce
2020-08-15  2:16         ` Chang, Bruce
2020-08-15  2:16           ` Chang, Bruce
2020-08-15  9:59           ` Chris Wilson
2020-08-15  9:59             ` Chris Wilson
2020-08-15  9:53         ` Chris Wilson
2020-08-15  9:53           ` Chris Wilson
2020-08-14 15:57 ` [Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all Chris Wilson
2020-08-14 18:18   ` Chang, Bruce
2020-08-14 18:41   ` Mika Kuoppala
2020-08-14 19:41     ` Chris Wilson
2020-08-14 20:18       ` Chris Wilson
2020-08-17  9:02         ` Mika Kuoppala
2020-08-14 16:15 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/gt: Widen CSB pointer to u64 for the parsers Patchwork
2020-08-14 16:16 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-08-14 16:34 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-08-14 18:07 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2020-08-14 18:29 ` [Intel-gfx] [PATCH 1/3] " Mika Kuoppala
2020-08-14 19:43   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.