[PATCH 1/2] drm/i915: Cancel reset preparations on failed resets

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets
@ 2018-05-30 15:02 Mika Kuoppala
  2018-05-30 15:02 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Mika Kuoppala @ 2018-05-30 15:02 UTC (permalink / raw)
  To: intel-gfx

Our reset handling has a retry layer further up in the
chain. As we have told the engine to prepare for reset,
and failed it, make sure to remove that preparation so
that the next attempted reset has a clean slate by triggering
another full prepare cycle for the engines. Note that with
successful reset, there is nothing to cleanup.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index b36a3b5736a0..68fe4c16acfb 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2092,22 +2092,26 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
 			      unsigned engine_mask)
 {
 	struct intel_engine_cs *engine;
-	unsigned int tmp;
+	unsigned int tmp, ret;
 
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		if (gen8_reset_engine_start(engine))
+	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+		if (gen8_reset_engine_start(engine)) {
+			ret = -EIO;
 			goto not_ready;
+		}
+	}
 
 	if (INTEL_GEN(dev_priv) >= 11)
-		return gen11_reset_engines(dev_priv, engine_mask);
+		ret = gen11_reset_engines(dev_priv, engine_mask);
 	else
-		return gen6_reset_engines(dev_priv, engine_mask);
+		ret = gen6_reset_engines(dev_priv, engine_mask);
 
 not_ready:
-	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-		gen8_reset_engine_cancel(engine);
+	if (ret)
+		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
+			gen8_reset_engine_cancel(engine);
 
-	return -EIO;
+	return ret;
 }
 
 typedef int (*reset_func)(struct drm_i915_private *, unsigned engine_mask);
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-05-30 15:02 [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Mika Kuoppala
@ 2018-05-30 15:02 ` Mika Kuoppala
  2018-05-30 20:19   ` Chris Wilson
  2018-05-30 15:59 ` [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Chris Wilson
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Mika Kuoppala @ 2018-05-30 15:02 UTC (permalink / raw)
  To: intel-gfx

There is a problem with kbl up to rev E0 where a heavy
memory traffic from adjacent engine(s) can cause an engine
reset to fail. This traffic can be from normal memory accesses
or it can be from heavy polling on a semaphore wait.

To combat the normal traffic, we do our best to idle the adjacent
engines, before we ask the engine to prepare for reset. For per
engine reset, this will add an unwanted extra latency as we
do blanket approach before every reset. In past already have
noticed that idling an engine before reset, improves our chances
of resetting it, but this only idles the engines we are about to
reset, not the adjancent ones.

We could only take the approach of idling adjacent engines,
if the first reset fails. But in this area, it is usually best
to get it right off the bat.

For the second issue where unlimited semaphore wait poll loop
is generating the heavy memory traffic and preventing a reset,
we add one microsecond poll interval to semaphore wait to
guarantee bandwidth for the reset preration. The side effect
is that we make semaphore completion latencies also 1us longer.

References: VTHSD#2227190, HSDES#1604216706, BSID#0917
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h          |  1 +
 drivers/gpu/drm/i915/intel_engine_cs.c   | 30 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 +
 drivers/gpu/drm/i915/intel_uncore.c      | 25 ++++++++++++++++++++
 drivers/gpu/drm/i915/intel_workarounds.c |  9 +++++++
 5 files changed, 66 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f238b7b33cd9..3c615a865cc5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2237,6 +2237,7 @@ enum i915_power_well_id {
 #define RING_RESET_CTL(base)	_MMIO((base)+0xd0)
 #define   RESET_CTL_REQUEST_RESET  (1 << 0)
 #define   RESET_CTL_READY_TO_RESET (1 << 1)
+#define RING_SEMA_WAIT_POLL(base) _MMIO((base)+0x24c)
 
 #define HSW_GTT_CACHE_EN	_MMIO(0x4024)
 #define   GTT_CACHE_EN_ALL	0xF0007FFF
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 13448ea76f57..4d30cbb2281e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -808,6 +808,36 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
 	return err;
 }
 
+int intel_engine_start_cs(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	const u32 base = engine->mmio_base;
+	const i915_reg_t mode = RING_MI_MODE(base);
+	int err;
+
+	if (INTEL_GEN(dev_priv) < 3)
+		return -ENODEV;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	I915_WRITE_FW(mode, _MASKED_BIT_DISABLE(STOP_RING));
+
+	err = 0;
+	if (__intel_wait_for_register_fw(dev_priv,
+					 mode, MODE_IDLE, 0,
+					 1000, 0,
+					 NULL)) {
+		GEM_TRACE("%s: timed out on STOP_RING -> NOT IDLE\n",
+			  engine->name);
+		err = -ETIMEDOUT;
+	}
+
+	/* A final mmio read to let GPU writes be hopefully flushed to memory */
+	POSTING_READ_FW(mode);
+
+	return err;
+}
+
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 {
 	switch (type) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index acef385c4c80..5e2c59128fa9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -879,6 +879,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
 
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
+int intel_engine_start_cs(struct intel_engine_cs *engine);
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 68fe4c16acfb..ffbae5c44b8c 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2088,12 +2088,34 @@ static void gen8_reset_engine_cancel(struct intel_engine_cs *engine)
 		      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
 }
 
+static void gen8_idle_engines(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	unsigned int tmp;
+
+	for_each_engine(engine, i915, tmp)
+		intel_engine_stop_cs(engine);
+}
+
+static void gen8_unidle_engines(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	unsigned int tmp;
+
+	for_each_engine(engine, i915, tmp)
+		intel_engine_start_cs(engine);
+}
+
 static int gen8_reset_engines(struct drm_i915_private *dev_priv,
 			      unsigned engine_mask)
 {
 	struct intel_engine_cs *engine;
 	unsigned int tmp, ret;
 
+	/* WaKBLVECSSemaphoreWaitPoll:kbl */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0))
+		gen8_idle_engines(dev_priv);
+
 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
 		if (gen8_reset_engine_start(engine)) {
 			ret = -EIO;
@@ -2111,6 +2133,9 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
 		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
 			gen8_reset_engine_cancel(engine);
 
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0))
+		gen8_unidle_engines(dev_priv);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index b1ab56a1ec31..5655d39c65cb 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -666,6 +666,15 @@ static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
 		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
 		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	/* WaKBLVECSSemaphoreWaitPoll:kbl */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {
+		struct intel_engine_cs *engine;
+		unsigned int tmp;
+
+		for_each_engine(engine, dev_priv, tmp)
+			I915_WRITE(RING_SEMA_WAIT_POLL(engine->mmio_base), 1);
+	}
 }
 
 static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets
  2018-05-30 15:02 [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Mika Kuoppala
  2018-05-30 15:02 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
@ 2018-05-30 15:59 ` Chris Wilson
  2018-05-30 17:29 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] " Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-05-30 15:59 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-05-30 16:02:05)
> Our reset handling has a retry layer further up in the
> chain. As we have told the engine to prepare for reset,
> and failed it, make sure to remove that preparation so
> that the next attempted reset has a clean slate by triggering
> another full prepare cycle for the engines. Note that with
> successful reset, there is nothing to cleanup.
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 20 ++++++++++++--------
>  1 file changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index b36a3b5736a0..68fe4c16acfb 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -2092,22 +2092,26 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
>                               unsigned engine_mask)
>  {
>         struct intel_engine_cs *engine;
> -       unsigned int tmp;
> +       unsigned int tmp, ret;

int ret; Might as well keep the types clean.

> -       for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
> -               if (gen8_reset_engine_start(engine))
> +       for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
> +               if (gen8_reset_engine_start(engine)) {
> +                       ret = -EIO;
>                         goto not_ready;
> +               }
> +       }
>  
>         if (INTEL_GEN(dev_priv) >= 11)
> -               return gen11_reset_engines(dev_priv, engine_mask);
> +               ret = gen11_reset_engines(dev_priv, engine_mask);
>         else
> -               return gen6_reset_engines(dev_priv, engine_mask);
> +               ret = gen6_reset_engines(dev_priv, engine_mask);
>  
>  not_ready:
> -       for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
> -               gen8_reset_engine_cancel(engine);
> +       if (ret)

Or we just always clear the bit here. On the success path, it will be
just a nop. On the not_ready path we're already clearing untouched
engines, so might as well go whole hog and overkill everything?

Might as well throw in a
References: https://bugs.freedesktop.org/show_bug.cgi?id=106684
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915: Cancel reset preparations on failed resets
  2018-05-30 15:02 [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Mika Kuoppala
  2018-05-30 15:02 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
  2018-05-30 15:59 ` [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Chris Wilson
@ 2018-05-30 17:29 ` Patchwork
  2018-05-30 17:48 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-05-30 18:37 ` ✓ Fi.CI.IGT: " Patchwork
  4 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-05-30 17:29 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Cancel reset preparations on failed resets
URL   : https://patchwork.freedesktop.org/series/43957/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
7d9f9247a4bb drm/i915: Cancel reset preparations on failed resets
9ab18769303b drm/i915: Add WaKBLVECSSemaphoreWaitPoll
-:40: CHECK:SPACING: spaces preferred around that '+' (ctx:VxV)
#40: FILE: drivers/gpu/drm/i915/i915_reg.h:2240:
+#define RING_SEMA_WAIT_POLL(base) _MMIO((base)+0x24c)
                                               ^

total: 0 errors, 0 warnings, 1 checks, 108 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Cancel reset preparations on failed resets
  2018-05-30 15:02 [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Mika Kuoppala
                   ` (2 preceding siblings ...)
  2018-05-30 17:29 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] " Patchwork
@ 2018-05-30 17:48 ` Patchwork
  2018-05-30 18:37 ` ✓ Fi.CI.IGT: " Patchwork
  4 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-05-30 17:48 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Cancel reset preparations on failed resets
URL   : https://patchwork.freedesktop.org/series/43957/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4257 -> Patchwork_9152 =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9152 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9152, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/43957/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9152:

  === IGT changes ===

    ==== Warnings ====

    igt@kms_flip@basic-flip-vs-dpms:
      fi-glk-j4005:       PASS -> SKIP

    
== Known issues ==

  Here are the changes found in Patchwork_9152 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_flip@basic-flip-vs-modeset:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106000)

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-glk-j4005:       PASS -> FAIL (fdo#100368)

    igt@kms_frontbuffer_tracking@basic:
      fi-hsw-4200u:       PASS -> DMESG-FAIL (fdo#102614, fdo#106103)
      fi-hsw-peppy:       PASS -> DMESG-FAIL (fdo#102614, fdo#106103)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
      fi-cnl-psr:         PASS -> DMESG-WARN (fdo#104951)

    igt@prime_vgem@basic-fence-flip:
      fi-glk-j4005:       PASS -> FAIL (fdo#104008)

    
    ==== Possible fixes ====

    igt@gem_mmap_gtt@basic-small-bo-tiledx:
      fi-gdg-551:         FAIL (fdo#102575) -> PASS

    igt@kms_flip@basic-flip-vs-wf_vblank:
      fi-cnl-psr:         FAIL (fdo#100368) -> PASS

    igt@kms_frontbuffer_tracking@basic:
      fi-bsw-n3050:       INCOMPLETE (fdo#106729) -> PASS

    igt@kms_pipe_crc_basic@nonblocking-crc-pipe-c:
      fi-skl-guc:         FAIL (fdo#104724, fdo#103191) -> PASS

    igt@kms_pipe_crc_basic@read-crc-pipe-c:
      fi-glk-j4005:       DMESG-WARN (fdo#106097, fdo#106000) -> PASS

    igt@prime_vgem@basic-fence-flip:
      fi-ilk-650:         FAIL (fdo#104008) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#104951 https://bugs.freedesktop.org/show_bug.cgi?id=104951
  fdo#106000 https://bugs.freedesktop.org/show_bug.cgi?id=106000
  fdo#106097 https://bugs.freedesktop.org/show_bug.cgi?id=106097
  fdo#106103 https://bugs.freedesktop.org/show_bug.cgi?id=106103
  fdo#106729 https://bugs.freedesktop.org/show_bug.cgi?id=106729


== Participating hosts (45 -> 39) ==

  Missing    (6): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-cfl-u2 fi-skl-6700hq 


== Build changes ==

    * Linux: CI_DRM_4257 -> Patchwork_9152

  CI_DRM_4257: 8aac35d26057479982a346c0e9cd57c2e930b7e1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4501: 6796a604bab6df9c84af149e799902360afdd157 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9152: 9ab18769303bacc835b8a6973e04ea77880e0cca @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

9ab18769303b drm/i915: Add WaKBLVECSSemaphoreWaitPoll
7d9f9247a4bb drm/i915: Cancel reset preparations on failed resets

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9152/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/2] drm/i915: Cancel reset preparations on failed resets
  2018-05-30 15:02 [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Mika Kuoppala
                   ` (3 preceding siblings ...)
  2018-05-30 17:48 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-05-30 18:37 ` Patchwork
  4 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-05-30 18:37 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Cancel reset preparations on failed resets
URL   : https://patchwork.freedesktop.org/series/43957/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4257_full -> Patchwork_9152_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9152_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9152_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/43957/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9152_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_schedule@deep-bsd1:
      shard-kbl:          PASS -> SKIP

    igt@pm_rc6_residency@rc6-accuracy:
      shard-snb:          SKIP -> PASS

    
== Known issues ==

  Here are the changes found in Patchwork_9152_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_gtt:
      shard-glk:          PASS -> INCOMPLETE (k.org#198133, fdo#103359)

    igt@kms_flip@basic-flip-vs-wf_vblank:
      shard-hsw:          PASS -> FAIL (fdo#103928)

    igt@kms_flip@plain-flip-ts-check-interruptible:
      shard-hsw:          PASS -> FAIL (fdo#100368)

    igt@kms_frontbuffer_tracking@fbc-suspend:
      shard-glk:          PASS -> DMESG-WARN (fdo#106728)

    igt@kms_rotation_crc@cursor-rotation-180:
      shard-hsw:          PASS -> FAIL (fdo#103925, fdo#104724)

    igt@pm_rpm@drm-resources-equal:
      shard-glk:          PASS -> FAIL (fdo#106539) +1

    
    ==== Possible fixes ====

    igt@drv_selftest@live_hangcheck:
      shard-kbl:          DMESG-FAIL (fdo#106560) -> PASS

    igt@gem_exec_parallel@render-fds:
      shard-snb:          INCOMPLETE (fdo#105411) -> PASS

    igt@kms_flip@flip-vs-expired-vblank-interruptible:
      shard-glk:          FAIL (fdo#105363) -> PASS

    igt@kms_flip@plain-flip-ts-check:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_flip_tiling@flip-to-x-tiled:
      shard-glk:          FAIL (fdo#104724) -> PASS

    igt@perf@blocking:
      shard-hsw:          FAIL (fdo#102252) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
  fdo#103928 https://bugs.freedesktop.org/show_bug.cgi?id=103928
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#106539 https://bugs.freedesktop.org/show_bug.cgi?id=106539
  fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560
  fdo#106728 https://bugs.freedesktop.org/show_bug.cgi?id=106728
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4257 -> Patchwork_9152

  CI_DRM_4257: 8aac35d26057479982a346c0e9cd57c2e930b7e1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4501: 6796a604bab6df9c84af149e799902360afdd157 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9152: 9ab18769303bacc835b8a6973e04ea77880e0cca @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9152/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-05-30 15:02 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
@ 2018-05-30 20:19   ` Chris Wilson
  2018-06-05 14:54     ` Mika Kuoppala
  2018-06-05 16:01     ` Mika Kuoppala
  0 siblings, 2 replies; 16+ messages in thread
From: Chris Wilson @ 2018-05-30 20:19 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-05-30 16:02:06)
> There is a problem with kbl up to rev E0 where a heavy
> memory traffic from adjacent engine(s) can cause an engine
> reset to fail. This traffic can be from normal memory accesses
> or it can be from heavy polling on a semaphore wait.
> 
> To combat the normal traffic, we do our best to idle the adjacent
> engines, before we ask the engine to prepare for reset. For per
> engine reset, this will add an unwanted extra latency as we
> do blanket approach before every reset. In past already have
> noticed that idling an engine before reset, improves our chances
> of resetting it, but this only idles the engines we are about to
> reset, not the adjancent ones.

Unfortunately we don't have a lock on the other engines, so can't
prevent two resets running in parallel clobbering state on the other.

So what's stopping the failure mode of falling back to resetting all
engines at once if resetting one fails? Is it a catastrophic failure?
 
> We could only take the approach of idling adjacent engines,
> if the first reset fails. But in this area, it is usually best
> to get it right off the bat.
> 
> For the second issue where unlimited semaphore wait poll loop
> is generating the heavy memory traffic and preventing a reset,
> we add one microsecond poll interval to semaphore wait to
> guarantee bandwidth for the reset preration. The side effect
> is that we make semaphore completion latencies also 1us longer.

You know the rule: second issue, second patch. That's odd, I would have
expected a MI_SEMA op to be an arbitration point (even inside the busy
wait loop), so would have expected it to behave nicely with STOP_RING.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-05-30 20:19   ` Chris Wilson
@ 2018-06-05 14:54     ` Mika Kuoppala
  2018-06-05 16:01     ` Mika Kuoppala
  1 sibling, 0 replies; 16+ messages in thread
From: Mika Kuoppala @ 2018-06-05 14:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2018-05-30 16:02:06)
>> There is a problem with kbl up to rev E0 where a heavy
>> memory traffic from adjacent engine(s) can cause an engine
>> reset to fail. This traffic can be from normal memory accesses
>> or it can be from heavy polling on a semaphore wait.
>> 
>> To combat the normal traffic, we do our best to idle the adjacent
>> engines, before we ask the engine to prepare for reset. For per
>> engine reset, this will add an unwanted extra latency as we
>> do blanket approach before every reset. In past already have
>> noticed that idling an engine before reset, improves our chances
>> of resetting it, but this only idles the engines we are about to
>> reset, not the adjancent ones.
>
> Unfortunately we don't have a lock on the other engines, so can't
> prevent two resets running in parallel clobbering state on the other.
>
> So what's stopping the failure mode of falling back to resetting all
> engines at once if resetting one fails? Is it a catastrophic failure?

Nothing that I can think of and for this we should just let the full
reset and it's explicit idling to be our backup plan.

And it if ever shows itself as frequent enough to warrant further
work, we should consider doing a full reset right from the start on
the affected hw.

-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-05-30 20:19   ` Chris Wilson
  2018-06-05 14:54     ` Mika Kuoppala
@ 2018-06-05 16:01     ` Mika Kuoppala
  1 sibling, 0 replies; 16+ messages in thread
From: Mika Kuoppala @ 2018-06-05 16:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2018-05-30 16:02:06)
>> 
>> For the second issue where unlimited semaphore wait poll loop
>> is generating the heavy memory traffic and preventing a reset,
>> we add one microsecond poll interval to semaphore wait to
>> guarantee bandwidth for the reset preration. The side effect
>> is that we make semaphore completion latencies also 1us longer.
>
> You know the rule: second issue, second patch. That's odd, I would have
> expected a MI_SEMA op to be an arbitration point (even inside the busy
> wait loop), so would have expected it to behave nicely with STOP_RING.

My interpretation was that the context save gets delayed/stuck.

-Mika

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-06-07  8:54   ` Joonas Lahtinen
@ 2018-06-08  9:31     ` Mika Kuoppala
  0 siblings, 0 replies; 16+ messages in thread
From: Mika Kuoppala @ 2018-06-08  9:31 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx

Joonas Lahtinen <joonas.lahtinen@linux.intel.com> writes:

> Quoting Mika Kuoppala (2018-06-05 19:03:57)
>> There is a problem with kbl up to rev E0 where a heavy
>> memory/fabric traffic from adjacent engine(s) can cause an engine
>> reset to fail. This traffic can be from normal memory accesses
>> or it can be from heavy polling on a semaphore wait.
>> 
>> For engine hogging causing a fail, we already fallback to
>> full reset. Which effectively stops all engines and thus
>> we only add a workaround documentation.
>> 
>> For the semaphore wait loop poll case, we add one microsecond
>> poll interval to semaphore wait to guarantee bandwidth for
>> the reset preration. The side effect is that we make semaphore
>> completion latencies also 1us longer.
>> 
>> v2: Let full reset handle the adjacent engine idling (Chris)
>> 
>> References: https://bugs.freedesktop.org/show_bug.cgi?id=106684
>> References: VTHSD#2227190, HSDES#1604216706, BSID#0917
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>
> Skip the RCS engine and this is;
>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

RCS engine skipped on v2, and both patches pushed.
Thank you both for review.
-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-06-05 16:03 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
  2018-06-05 16:12   ` Chris Wilson
  2018-06-07  8:54   ` Joonas Lahtinen
@ 2018-06-07 17:24   ` Mika Kuoppala
  2 siblings, 0 replies; 16+ messages in thread
From: Mika Kuoppala @ 2018-06-07 17:24 UTC (permalink / raw)
  To: intel-gfx

There is a problem with kbl up to rev E0 where a heavy
memory/fabric traffic from adjacent engine(s) can cause an engine
reset to fail. This traffic can be from normal memory accesses
or it can be from heavy polling on a semaphore wait.

For engine hogging causing a fail, we already fallback to
full reset. Which effectively stops all engines and thus
we only add a workaround documentation.

For the semaphore wait loop poll case, we add one microsecond
poll interval to semaphore wait to guarantee bandwidth for
the reset preration. The side effect is that we make semaphore
completion latencies also 1us longer.

v2: Let full reset handle the adjacent engine idling (Chris)
v3: Skip render engine (Joonas), please checkpatch on define (Mika)

References: https://bugs.freedesktop.org/show_bug.cgi?id=106684
References: VTHSD#2227190, HSDES#1604216706, BSID#0917
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h          |  1 +
 drivers/gpu/drm/i915/intel_uncore.c      |  2 ++
 drivers/gpu/drm/i915/intel_workarounds.c | 13 +++++++++++++
 3 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f0317bde3aab..987def26ce82 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2242,6 +2242,7 @@ enum i915_power_well_id {
 #define RING_RESET_CTL(base)	_MMIO((base)+0xd0)
 #define   RESET_CTL_REQUEST_RESET  (1 << 0)
 #define   RESET_CTL_READY_TO_RESET (1 << 1)
+#define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c)
 
 #define HSW_GTT_CACHE_EN	_MMIO(0x4024)
 #define   GTT_CACHE_EN_ALL	0xF0007FFF
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index bb03f6d8b3d1..b892ca8396e8 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2174,6 +2174,8 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 		 * Thus assume it is best to stop engines on all gens
 		 * where we have a gpu reset.
 		 *
+		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+		 *
 		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
 		 *
 		 * FIXME: Wa for more modern gens needs to be validated
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index b1ab56a1ec31..24b929ce3341 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -666,6 +666,19 @@ static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
 		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
 		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	/* WaKBLVECSSemaphoreWaitPoll:kbl */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {
+		struct intel_engine_cs *engine;
+		unsigned int tmp;
+
+		for_each_engine(engine, dev_priv, tmp) {
+			if (engine->id == RCS)
+				continue;
+
+			I915_WRITE(RING_SEMA_WAIT_POLL(engine->mmio_base), 1);
+		}
+	}
 }
 
 static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-06-05 16:03 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
  2018-06-05 16:12   ` Chris Wilson
@ 2018-06-07  8:54   ` Joonas Lahtinen
  2018-06-08  9:31     ` Mika Kuoppala
  2018-06-07 17:24   ` Mika Kuoppala
  2 siblings, 1 reply; 16+ messages in thread
From: Joonas Lahtinen @ 2018-06-07  8:54 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-06-05 19:03:57)
> There is a problem with kbl up to rev E0 where a heavy
> memory/fabric traffic from adjacent engine(s) can cause an engine
> reset to fail. This traffic can be from normal memory accesses
> or it can be from heavy polling on a semaphore wait.
> 
> For engine hogging causing a fail, we already fallback to
> full reset. Which effectively stops all engines and thus
> we only add a workaround documentation.
> 
> For the semaphore wait loop poll case, we add one microsecond
> poll interval to semaphore wait to guarantee bandwidth for
> the reset preration. The side effect is that we make semaphore
> completion latencies also 1us longer.
> 
> v2: Let full reset handle the adjacent engine idling (Chris)
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=106684
> References: VTHSD#2227190, HSDES#1604216706, BSID#0917
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

Skip the RCS engine and this is;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-06-06  8:40     ` Mika Kuoppala
@ 2018-06-06  8:47       ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-06-06  8:47 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-06-06 09:40:11)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Quoting Mika Kuoppala (2018-06-05 17:03:57)
> >> There is a problem with kbl up to rev E0 where a heavy
> >> memory/fabric traffic from adjacent engine(s) can cause an engine
> >> reset to fail. This traffic can be from normal memory accesses
> >> or it can be from heavy polling on a semaphore wait.
> >> 
> >> For engine hogging causing a fail, we already fallback to
> >> full reset. Which effectively stops all engines and thus
> >> we only add a workaround documentation.
> >> 
> >> For the semaphore wait loop poll case, we add one microsecond
> >> poll interval to semaphore wait to guarantee bandwidth for
> >> the reset preration. The side effect is that we make semaphore
> >> completion latencies also 1us longer.
> >> 
> >> v2: Let full reset handle the adjacent engine idling (Chris)
> >> 
> >> References: https://bugs.freedesktop.org/show_bug.cgi?id=106684
> >> References: VTHSD#2227190, HSDES#1604216706, BSID#0917
> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> >> ---
> >> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> >> index b1ab56a1ec31..5655d39c65cb 100644
> >> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> >> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> >> @@ -666,6 +666,15 @@ static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> >>         I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> >>                    I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> >>                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> >> +
> >> +       /* WaKBLVECSSemaphoreWaitPoll:kbl */
> >> +       if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {
> >
> > Hmm, what revision was production? Just checking we need to ship this
> > w/a...
> 
> The bspec list of revs seems outdated so can't trust that blindly
> but already 0x1 is not preprod on that list. Also found nuc in lab
> which is 0x02.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>

Care to update intel_detect_preproduction_hw() ?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-06-05 16:12   ` Chris Wilson
@ 2018-06-06  8:40     ` Mika Kuoppala
  2018-06-06  8:47       ` Chris Wilson
  0 siblings, 1 reply; 16+ messages in thread
From: Mika Kuoppala @ 2018-06-06  8:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2018-06-05 17:03:57)
>> There is a problem with kbl up to rev E0 where a heavy
>> memory/fabric traffic from adjacent engine(s) can cause an engine
>> reset to fail. This traffic can be from normal memory accesses
>> or it can be from heavy polling on a semaphore wait.
>> 
>> For engine hogging causing a fail, we already fallback to
>> full reset. Which effectively stops all engines and thus
>> we only add a workaround documentation.
>> 
>> For the semaphore wait loop poll case, we add one microsecond
>> poll interval to semaphore wait to guarantee bandwidth for
>> the reset preration. The side effect is that we make semaphore
>> completion latencies also 1us longer.
>> 
>> v2: Let full reset handle the adjacent engine idling (Chris)
>> 
>> References: https://bugs.freedesktop.org/show_bug.cgi?id=106684
>> References: VTHSD#2227190, HSDES#1604216706, BSID#0917
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> ---
>> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
>> index b1ab56a1ec31..5655d39c65cb 100644
>> --- a/drivers/gpu/drm/i915/intel_workarounds.c
>> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
>> @@ -666,6 +666,15 @@ static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>>         I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>>                    I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>>                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>> +
>> +       /* WaKBLVECSSemaphoreWaitPoll:kbl */
>> +       if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {
>
> Hmm, what revision was production? Just checking we need to ship this
> w/a...

The bspec list of revs seems outdated so can't trust that blindly
but already 0x1 is not preprod on that list. Also found nuc in lab
which is 0x02.

-Mika


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-06-05 16:03 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
@ 2018-06-05 16:12   ` Chris Wilson
  2018-06-06  8:40     ` Mika Kuoppala
  2018-06-07  8:54   ` Joonas Lahtinen
  2018-06-07 17:24   ` Mika Kuoppala
  2 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2018-06-05 16:12 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-06-05 17:03:57)
> There is a problem with kbl up to rev E0 where a heavy
> memory/fabric traffic from adjacent engine(s) can cause an engine
> reset to fail. This traffic can be from normal memory accesses
> or it can be from heavy polling on a semaphore wait.
> 
> For engine hogging causing a fail, we already fallback to
> full reset. Which effectively stops all engines and thus
> we only add a workaround documentation.
> 
> For the semaphore wait loop poll case, we add one microsecond
> poll interval to semaphore wait to guarantee bandwidth for
> the reset preration. The side effect is that we make semaphore
> completion latencies also 1us longer.
> 
> v2: Let full reset handle the adjacent engine idling (Chris)
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=106684
> References: VTHSD#2227190, HSDES#1604216706, BSID#0917
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index b1ab56a1ec31..5655d39c65cb 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -666,6 +666,15 @@ static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
>         I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>                    I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> +
> +       /* WaKBLVECSSemaphoreWaitPoll:kbl */
> +       if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {

Hmm, what revision was production? Just checking we need to ship this
w/a...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll
  2018-06-05 16:03 [PATCH 1/2] " Mika Kuoppala
@ 2018-06-05 16:03 ` Mika Kuoppala
  2018-06-05 16:12   ` Chris Wilson
                     ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Mika Kuoppala @ 2018-06-05 16:03 UTC (permalink / raw)
  To: intel-gfx

There is a problem with kbl up to rev E0 where a heavy
memory/fabric traffic from adjacent engine(s) can cause an engine
reset to fail. This traffic can be from normal memory accesses
or it can be from heavy polling on a semaphore wait.

For engine hogging causing a fail, we already fallback to
full reset. Which effectively stops all engines and thus
we only add a workaround documentation.

For the semaphore wait loop poll case, we add one microsecond
poll interval to semaphore wait to guarantee bandwidth for
the reset preration. The side effect is that we make semaphore
completion latencies also 1us longer.

v2: Let full reset handle the adjacent engine idling (Chris)

References: https://bugs.freedesktop.org/show_bug.cgi?id=106684
References: VTHSD#2227190, HSDES#1604216706, BSID#0917
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h          | 1 +
 drivers/gpu/drm/i915/intel_uncore.c      | 2 ++
 drivers/gpu/drm/i915/intel_workarounds.c | 9 +++++++++
 3 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f0317bde3aab..0e8c7896cd74 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2242,6 +2242,7 @@ enum i915_power_well_id {
 #define RING_RESET_CTL(base)	_MMIO((base)+0xd0)
 #define   RESET_CTL_REQUEST_RESET  (1 << 0)
 #define   RESET_CTL_READY_TO_RESET (1 << 1)
+#define RING_SEMA_WAIT_POLL(base) _MMIO((base)+0x24c)
 
 #define HSW_GTT_CACHE_EN	_MMIO(0x4024)
 #define   GTT_CACHE_EN_ALL	0xF0007FFF
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index bb03f6d8b3d1..b892ca8396e8 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2174,6 +2174,8 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 		 * Thus assume it is best to stop engines on all gens
 		 * where we have a gpu reset.
 		 *
+		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+		 *
 		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
 		 *
 		 * FIXME: Wa for more modern gens needs to be validated
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index b1ab56a1ec31..5655d39c65cb 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -666,6 +666,15 @@ static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
 		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
 		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	/* WaKBLVECSSemaphoreWaitPoll:kbl */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {
+		struct intel_engine_cs *engine;
+		unsigned int tmp;
+
+		for_each_engine(engine, dev_priv, tmp)
+			I915_WRITE(RING_SEMA_WAIT_POLL(engine->mmio_base), 1);
+	}
 }
 
 static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-06-08  9:31 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-30 15:02 [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Mika Kuoppala
2018-05-30 15:02 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
2018-05-30 20:19   ` Chris Wilson
2018-06-05 14:54     ` Mika Kuoppala
2018-06-05 16:01     ` Mika Kuoppala
2018-05-30 15:59 ` [PATCH 1/2] drm/i915: Cancel reset preparations on failed resets Chris Wilson
2018-05-30 17:29 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] " Patchwork
2018-05-30 17:48 ` ✓ Fi.CI.BAT: success " Patchwork
2018-05-30 18:37 ` ✓ Fi.CI.IGT: " Patchwork
2018-06-05 16:03 [PATCH 1/2] " Mika Kuoppala
2018-06-05 16:03 ` [PATCH 2/2] drm/i915: Add WaKBLVECSSemaphoreWaitPoll Mika Kuoppala
2018-06-05 16:12   ` Chris Wilson
2018-06-06  8:40     ` Mika Kuoppala
2018-06-06  8:47       ` Chris Wilson
2018-06-07  8:54   ` Joonas Lahtinen
2018-06-08  9:31     ` Mika Kuoppala
2018-06-07 17:24   ` Mika Kuoppala

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.