All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Stop engines around GPU reset preparations
@ 2018-03-02 10:26 Chris Wilson
  2018-03-02 11:42 ` ✓ Fi.CI.BAT: success for " Patchwork
  2018-03-02 13:27 ` ✗ Fi.CI.IGT: failure " Patchwork
  0 siblings, 2 replies; 4+ messages in thread
From: Chris Wilson @ 2018-03-02 10:26 UTC (permalink / raw)
  To: intel-gfx

As we make preparations to reset the GPU state, we assume that the GPU
is hung and will not advance. Make this assumption more explicit by
setting the STOP_RING bit on the engines as part of our early reset
preparations.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
---
See https://intel-gfx-ci.01.org/tree/drm-tip/kasan_15/fi-bdw-5557u/pstore22-1519879816_Panic_3.log
for a bizarre error that kasan-farm keeps on trying over. Maybe related
to this?
---
 drivers/gpu/drm/i915/i915_drv.c     |  3 +++
 drivers/gpu/drm/i915/i915_drv.h     | 10 ++++++++--
 drivers/gpu/drm/i915/intel_uncore.c | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index aaa861b51024..925f5722d077 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1908,6 +1908,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
 	error->reset_count++;
 
 	disable_irq(i915->drm.irq);
+	intel_gpu_reset_prepare(i915, ALL_ENGINES);
+
 	ret = i915_gem_reset_prepare(i915);
 	if (ret) {
 		dev_err(i915->drm.dev, "GPU recovery failed\n");
@@ -1969,6 +1971,7 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
 
 finish:
 	i915_gem_reset_finish(i915);
+	intel_gpu_reset_finish(i915, ALL_ENGINES);
 	enable_irq(i915->drm.irq);
 
 wakeup:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 10c9e5e619ab..b95e675e0834 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2957,8 +2957,14 @@ extern const struct dev_pm_ops i915_pm_ops;
 extern int i915_driver_load(struct pci_dev *pdev,
 			    const struct pci_device_id *ent);
 extern void i915_driver_unload(struct drm_device *dev);
-extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
-extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
+
+bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
+
+void intel_gpu_reset_prepare(struct drm_i915_private *dev_priv,
+			     unsigned engine_mask);
+int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
+void intel_gpu_reset_finish(struct drm_i915_private *dev_priv,
+			    unsigned engine_mask);
 
 #define I915_RESET_QUIET BIT(0)
 extern void i915_reset(struct drm_i915_private *i915, unsigned int flags);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 5ae9a62712ca..7186fe4d2ba9 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1899,6 +1899,29 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
 		return NULL;
 }
 
+static void i915_engines_set_mode(struct drm_i915_private *dev_priv,
+				  unsigned engine_mask,
+				  u32 mode)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (INTEL_GEN(dev_priv) < 3)
+		return;
+
+	for_each_engine_masked(engine, dev_priv, engine_mask, id)
+		I915_WRITE_FW(RING_MI_MODE(engine->mmio_base), mode);
+}
+
+void intel_gpu_reset_prepare(struct drm_i915_private *dev_priv,
+			     unsigned engine_mask)
+{
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	i915_engines_set_mode(dev_priv, engine_mask,
+			      _MASKED_BIT_ENABLE(STOP_RING));
+}
+
 int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 {
 	reset_func reset = intel_get_gpu_reset(dev_priv);
@@ -1939,6 +1962,16 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 	return ret;
 }
 
+void intel_gpu_reset_finish(struct drm_i915_private *dev_priv,
+			    unsigned engine_mask)
+{
+	/* Clear the STOP_RING bit as the reset may not have occurred */
+	i915_engines_set_mode(dev_priv, engine_mask,
+			      _MASKED_BIT_DISABLE(STOP_RING));
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
 bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
 {
 	return intel_get_gpu_reset(dev_priv) != NULL;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Stop engines around GPU reset preparations
  2018-03-02 10:26 [PATCH] drm/i915: Stop engines around GPU reset preparations Chris Wilson
@ 2018-03-02 11:42 ` Patchwork
  2018-03-02 13:27 ` ✗ Fi.CI.IGT: failure " Patchwork
  1 sibling, 0 replies; 4+ messages in thread
From: Patchwork @ 2018-03-02 11:42 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Stop engines around GPU reset preparations
URL   : https://patchwork.freedesktop.org/series/39261/
State : success

== Summary ==

Series 39261v1 drm/i915: Stop engines around GPU reset preparations
https://patchwork.freedesktop.org/api/1.0/series/39261/revisions/1/mbox/

---- Known issues:

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713
Test prime_vgem:
        Subgroup basic-fence-flip:
                fail       -> PASS       (fi-byt-n2820) fdo#104008

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:415s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:420s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:370s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:480s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:277s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:478s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:482s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:465s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:456s
fi-cfl-8700k     total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:390s
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:561s
fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:414s
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:289s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:506s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:384s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:407s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:451s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:414s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:453s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:493s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:449s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:494s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:583s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:424s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:502s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:516s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:489s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:479s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:409s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:429s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:524s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:390s
Blacklisted hosts:
fi-cfl-u         total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:496s

b2e10fd5e8b2cd72b0e1eba46c1221dc3d4b70bc drm-tip: 2018y-03m-02d-09h-36m-59s UTC integration manifest
8e51b8d3db3c drm/i915: Stop engines around GPU reset preparations

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8209/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915: Stop engines around GPU reset preparations
  2018-03-02 10:26 [PATCH] drm/i915: Stop engines around GPU reset preparations Chris Wilson
  2018-03-02 11:42 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-03-02 13:27 ` Patchwork
  1 sibling, 0 replies; 4+ messages in thread
From: Patchwork @ 2018-03-02 13:27 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Stop engines around GPU reset preparations
URL   : https://patchwork.freedesktop.org/series/39261/
State : failure

== Summary ==

---- Possible new issues:

Test drv_selftest:
        Subgroup live_hangcheck:
                pass       -> INCOMPLETE (shard-apl)
Test gem_eio:
        Subgroup in-flight-internal:
                pass       -> INCOMPLETE (shard-apl)
Test gem_exec_capture:
        Subgroup capture-vebox:
                pass       -> INCOMPLETE (shard-apl)
Test kms_cursor_crc:
        Subgroup cursor-128x128-suspend:
                skip       -> PASS       (shard-snb)

---- Known issues:

Test gem_eio:
        Subgroup in-flight-external:
                pass       -> INCOMPLETE (shard-apl) fdo#104945 +1
Test kms_chv_cursor_fail:
        Subgroup pipe-b-256x256-bottom-edge:
                dmesg-warn -> PASS       (shard-snb) fdo#105185 +2
Test kms_cursor_crc:
        Subgroup cursor-64x64-suspend:
                pass       -> INCOMPLETE (shard-hsw) fdo#103540

fdo#104945 https://bugs.freedesktop.org/show_bug.cgi?id=104945
fdo#105185 https://bugs.freedesktop.org/show_bug.cgi?id=105185
fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540

shard-apl        total:3190 pass:1654 dwarn:1   dfail:0   fail:7   skip:1522 time:10062s
shard-hsw        total:3422 pass:1749 dwarn:1   dfail:0   fail:1   skip:1669 time:11471s
shard-snb        total:3463 pass:1361 dwarn:2   dfail:0   fail:1   skip:2099 time:7009s
Blacklisted hosts:
shard-kbl        total:3190 pass:1765 dwarn:10  dfail:0   fail:6   skip:1403 time:8063s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8209/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] drm/i915: Stop engines around GPU reset preparations
@ 2018-03-03  9:08 Chris Wilson
  0 siblings, 0 replies; 4+ messages in thread
From: Chris Wilson @ 2018-03-03  9:08 UTC (permalink / raw)
  To: intel-gfx

As we make preparations to reset the GPU state, we assume that the GPU
is hung and will not advance. Make this assumption more explicit by
setting the STOP_RING bit on the engines as part of our early reset
preparations.

v2: Move intel_gpu_reset_(prepare|finish) to
i915_gem_reset_engine_(prepare|finish) respectively. When the CS hits
the STOP_RING it emit an arbitration event, causing a context switch
between execlists and associated interrupts. Before we can allow that to
happen, we have to disable the execlists interrupt processing so that we
do not try and handle the partial completion (i.e. seeing the completed
even before the request has been signaled).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> #v1
Link: https://patchwork.freedesktop.org/patch/msgid/20180302113324.23189-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.h     | 11 +++++++++--
 drivers/gpu/drm/i915/i915_gem.c     |  4 ++++
 drivers/gpu/drm/i915/intel_uncore.c | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 10c9e5e619ab..0cb141f768ed 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2957,8 +2957,15 @@ extern const struct dev_pm_ops i915_pm_ops;
 extern int i915_driver_load(struct pci_dev *pdev,
 			    const struct pci_device_id *ent);
 extern void i915_driver_unload(struct drm_device *dev);
-extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
-extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
+
+bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
+
+void intel_gpu_reset_prepare(struct drm_i915_private *dev_priv,
+			     unsigned int engine_mask);
+int intel_gpu_reset(struct drm_i915_private *dev_priv,
+		    unsigned int engine_mask);
+void intel_gpu_reset_finish(struct drm_i915_private *dev_priv,
+			    unsigned int engine_mask);
 
 #define I915_RESET_QUIET BIT(0)
 extern void i915_reset(struct drm_i915_private *i915, unsigned int flags);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a5bd07338b46..3f063967a6e0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2953,6 +2953,8 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 	if (engine->i915->guc.preempt_wq)
 		flush_workqueue(engine->i915->guc.preempt_wq);
 
+	intel_gpu_reset_prepare(engine->i915, BIT(engine->id));
+
 	if (engine->irq_seqno_barrier)
 		engine->irq_seqno_barrier(engine);
 
@@ -3154,6 +3156,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
 {
+	intel_gpu_reset_finish(engine->i915, BIT(engine->id));
+
 	tasklet_enable(&engine->execlists.tasklet);
 	kthread_unpark(engine->breadcrumbs.signaler);
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 5ae9a62712ca..e193af2feefb 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1899,7 +1899,31 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
 		return NULL;
 }
 
-int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
+static void i915_engines_set_mode(struct drm_i915_private *dev_priv,
+				  unsigned int engine_mask,
+				  u32 mode)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (INTEL_GEN(dev_priv) < 3)
+		return;
+
+	for_each_engine_masked(engine, dev_priv, engine_mask, id)
+		I915_WRITE_FW(RING_MI_MODE(engine->mmio_base), mode);
+}
+
+void intel_gpu_reset_prepare(struct drm_i915_private *dev_priv,
+			     unsigned int engine_mask)
+{
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	i915_engines_set_mode(dev_priv, engine_mask,
+			      _MASKED_BIT_ENABLE(STOP_RING));
+}
+
+int intel_gpu_reset(struct drm_i915_private *dev_priv,
+		    unsigned int engine_mask)
 {
 	reset_func reset = intel_get_gpu_reset(dev_priv);
 	int retry;
@@ -1939,6 +1963,16 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 	return ret;
 }
 
+void intel_gpu_reset_finish(struct drm_i915_private *dev_priv,
+			    unsigned int engine_mask)
+{
+	/* Clear the STOP_RING bit as the reset may not have occurred */
+	i915_engines_set_mode(dev_priv, engine_mask,
+			      _MASKED_BIT_DISABLE(STOP_RING));
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
 bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
 {
 	return intel_get_gpu_reset(dev_priv) != NULL;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-03-03  9:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-02 10:26 [PATCH] drm/i915: Stop engines around GPU reset preparations Chris Wilson
2018-03-02 11:42 ` ✓ Fi.CI.BAT: success for " Patchwork
2018-03-02 13:27 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-03-03  9:08 [PATCH] " Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.