All of lore.kernel.org
 help / color / mirror / Atom feed
* [CI 1/3] drm/i915: Restore current RPS state after reset
@ 2016-09-21 13:51 Chris Wilson
  2016-09-21 13:51 ` [CI 2/3] drm/i915: Only shrink the unbound objects during freeze Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Chris Wilson @ 2016-09-21 13:51 UTC (permalink / raw)
  To: intel-gfx

Following commit 821ed7df6e2a ("drm/i915: Update reset path to fix
incomplete requests") we no longer mark the context as lost on reset as
we keep the requests (and contexts) alive. However, RPS remains reset
and we need to restore the current state to match the in-flight
requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97824
Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 9 ---------
 drivers/gpu/drm/i915/i915_gem.c | 7 +++++++
 drivers/gpu/drm/i915/i915_irq.c | 6 ++++++
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 7f4e8adec8a8..8ae5853ea3c6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1786,15 +1786,6 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	/*
-	 * rps/rc6 re-init is necessary to restore state lost after the
-	 * reset and the re-install of gt irqs. Skip for ironlake per
-	 * previous concerns that it doesn't respond well to some forms
-	 * of re-init after reset.
-	 */
-	intel_sanitize_gt_powersave(dev_priv);
-	intel_autoenable_gt_powersave(dev_priv);
-
 wakeup:
 	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
 	return;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c8bd02277b7d..aeb46658ab3c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2628,6 +2628,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 		i915_gem_reset_engine(engine);
 
 	i915_gem_restore_fences(&dev_priv->drm);
+
+	if (dev_priv->gt.awake) {
+		intel_sanitize_gt_powersave(dev_priv);
+		intel_enable_gt_powersave(dev_priv);
+		if (INTEL_GEN(dev_priv) >= 6)
+			gen6_rps_busy(dev_priv);
+	}
 }
 
 static void nop_submit_request(struct drm_i915_gem_request *request)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a5c02f6ea6a0..f8c0beaadf30 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -350,6 +350,9 @@ void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
 
 void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
+	if (READ_ONCE(dev_priv->rps.interrupts_enabled))
+		return;
+
 	spin_lock_irq(&dev_priv->irq_lock);
 	WARN_ON_ONCE(dev_priv->rps.pm_iir);
 	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
@@ -368,6 +371,9 @@ u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask)
 
 void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
+	if (!READ_ONCE(dev_priv->rps.interrupts_enabled))
+		return;
+
 	spin_lock_irq(&dev_priv->irq_lock);
 	dev_priv->rps.interrupts_enabled = false;
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [CI 2/3] drm/i915: Only shrink the unbound objects during freeze
  2016-09-21 13:51 [CI 1/3] drm/i915: Restore current RPS state after reset Chris Wilson
@ 2016-09-21 13:51 ` Chris Wilson
  2016-09-21 13:51 ` [CI 3/3] drm/i915/execlists: Reset RING registers upon resume Chris Wilson
  2016-09-21 14:49 ` ✗ Fi.CI.BAT: failure for series starting with [CI,1/3] drm/i915: Restore current RPS state after reset Patchwork
  2 siblings, 0 replies; 4+ messages in thread
From: Chris Wilson @ 2016-09-21 13:51 UTC (permalink / raw)
  To: intel-gfx

At the point of creating the hibernation image, the runtime power manage
core is disabled - and using the rpm functions triggers a warn.
i915_gem_shrink_all() tries to unbind objects, which requires device
access and so tries to how an rpm reference triggering a warning:

[   44.235420] ------------[ cut here ]------------
[   44.235424] WARNING: CPU: 2 PID: 2199 at drivers/gpu/drm/i915/intel_runtime_pm.c:2688 intel_runtime_pm_get_if_in_use+0xe6/0xf0
[   44.235426] WARN_ON_ONCE(ret < 0)
[   44.235445] Modules linked in: ctr ccm arc4 rt2800usb rt2x00usb rt2800lib rt2x00lib crc_ccitt mac80211 cmac cfg80211 btusb rfcomm bnep btrtl btbcm btintel bluetooth dcdbas x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_generic aesni_intel snd_hda_codec_hdmi aes_x86_64 lrw gf128mul snd_hda_intel glue_helper ablk_helper cryptd snd_hda_codec hid_multitouch joydev snd_hda_core binfmt_misc i2c_hid serio_raw snd_pcm acpi_pad snd_timer snd i2c_designware_platform 8250_dw nls_iso8859_1 i2c_designware_core lpc_ich mfd_core soundcore usbhid hid psmouse ahci libahci
[   44.235447] CPU: 2 PID: 2199 Comm: kworker/u8:8 Not tainted 4.8.0-rc5+ #130
[   44.235447] Hardware name: Dell Inc. XPS 13 9343/0310JH, BIOS A07 11/11/2015
[   44.235450] Workqueue: events_unbound async_run_entry_fn
[   44.235453]  0000000000000000 ffff8801b2f7fb98 ffffffff81306c2f ffff8801b2f7fbe8
[   44.235454]  0000000000000000 ffff8801b2f7fbd8 ffffffff81056c01 00000a801f50ecc0
[   44.235456]  ffff88020ce50000 ffff88020ce59b60 ffffffff81a60b5c ffffffff81414840
[   44.235456] Call Trace:
[   44.235459]  [<ffffffff81306c2f>] dump_stack+0x4d/0x6e
[   44.235461]  [<ffffffff81056c01>] __warn+0xd1/0xf0
[   44.235464]  [<ffffffff81414840>] ? i915_pm_suspend_late+0x30/0x30
[   44.235465]  [<ffffffff81056c6f>] warn_slowpath_fmt+0x4f/0x60
[   44.235468]  [<ffffffff814e73ce>] ? pm_runtime_get_if_in_use+0x6e/0xa0
[   44.235469]  [<ffffffff81433526>] intel_runtime_pm_get_if_in_use+0xe6/0xf0
[   44.235471]  [<ffffffff81458a26>] i915_gem_shrink+0x306/0x360
[   44.235473]  [<ffffffff81343fd4>] ? pci_platform_power_transition+0x24/0x90
[   44.235475]  [<ffffffff81414840>] ? i915_pm_suspend_late+0x30/0x30
[   44.235476]  [<ffffffff81458dfb>] i915_gem_shrink_all+0x1b/0x30
[   44.235478]  [<ffffffff814560b3>] i915_gem_freeze_late+0x33/0x90
[   44.235479]  [<ffffffff81414877>] i915_pm_freeze_late+0x37/0x40
[   44.235481]  [<ffffffff814e9b8e>] dpm_run_callback+0x4e/0x130
[   44.235483]  [<ffffffff814ea5db>] __device_suspend_late+0xdb/0x1f0
[   44.235484]  [<ffffffff814ea70f>] async_suspend_late+0x1f/0xa0
[   44.235486]  [<ffffffff81077557>] async_run_entry_fn+0x37/0x150
[   44.235488]  [<ffffffff8106f518>] process_one_work+0x148/0x3f0
[   44.235490]  [<ffffffff8106f8eb>] worker_thread+0x12b/0x490
[   44.235491]  [<ffffffff8106f7c0>] ? process_one_work+0x3f0/0x3f0
[   44.235492]  [<ffffffff81074d09>] kthread+0xc9/0xe0
[   44.235495]  [<ffffffff816e257f>] ret_from_fork+0x1f/0x40
[   44.235496]  [<ffffffff81074c40>] ? kthread_park+0x60/0x60
[   44.235497] ---[ end trace e438706b97c7f132 ]---

Alternatively, to actually shrink everything we have to do so slightly
earlier in the hibernation process.

To keep lockdep silent, we need to take struct_mutex for the shrinker
even though we know that we are the only user during the freeze.

Fixes: 7aab2d534e35 ("drm/i915: Shrink objects prior to hibernation")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 11 ++++++++++-
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++-
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8ae5853ea3c6..f2d0801c590c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1863,7 +1863,16 @@ static int i915_pm_resume(struct device *kdev)
 /* freeze: before creating the hibernation_image */
 static int i915_pm_freeze(struct device *kdev)
 {
-	return i915_pm_suspend(kdev);
+	int ret;
+	ret = i915_pm_suspend(kdev);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_freeze(kdev_to_i915(kdev));
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 static int i915_pm_freeze_late(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 356899543d4b..008c74bfabad 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3076,6 +3076,7 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 void i915_gem_load_init(struct drm_device *dev);
 void i915_gem_load_cleanup(struct drm_device *dev);
 void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
+int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
 void *i915_gem_object_alloc(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index aeb46658ab3c..1418c1c522cb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4593,6 +4593,19 @@ void i915_gem_load_cleanup(struct drm_device *dev)
 	rcu_barrier();
 }
 
+int i915_gem_freeze(struct drm_i915_private *dev_priv)
+{
+	intel_runtime_pm_get(dev_priv);
+
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	i915_gem_shrink_all(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	intel_runtime_pm_put(dev_priv);
+
+	return 0;
+}
+
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj;
@@ -4616,7 +4629,8 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 	 * the objects as well.
 	 */
 
-	i915_gem_shrink_all(dev_priv);
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
 
 	for (p = phases; *p; p++) {
 		list_for_each_entry(obj, *p, global_list) {
@@ -4624,6 +4638,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 			obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 		}
 	}
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	return 0;
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [CI 3/3] drm/i915/execlists: Reset RING registers upon resume
  2016-09-21 13:51 [CI 1/3] drm/i915: Restore current RPS state after reset Chris Wilson
  2016-09-21 13:51 ` [CI 2/3] drm/i915: Only shrink the unbound objects during freeze Chris Wilson
@ 2016-09-21 13:51 ` Chris Wilson
  2016-09-21 14:49 ` ✗ Fi.CI.BAT: failure for series starting with [CI,1/3] drm/i915: Restore current RPS state after reset Patchwork
  2 siblings, 0 replies; 4+ messages in thread
From: Chris Wilson @ 2016-09-21 13:51 UTC (permalink / raw)
  To: intel-gfx

There is a disparity in the context image saved to disk and our own
bookkeeping - that is we presume the RING_HEAD and RING_TAIL match our
stored ce->ring->tail value. However, as we emit WA_TAIL_DWORDS into the
ring but may not tell the GPU about them, the GPU may be lagging behind
our bookkeeping. Upon hibernation we do not save stolen pages, presuming
that their contents are volatile. This means that although we start
writing into the ring at tail, the GPU starts executing from its HEAD
and there may be some garbage in between and so the GPU promptly hangs
upon resume.

Testcase: igt/gem_exec_suspend/basic-S4
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96526
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 50 +++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 251143361f31..39417b77bff2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2129,30 +2129,42 @@ error_deref_obj:
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 {
-	struct i915_gem_context *ctx = dev_priv->kernel_context;
 	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+
+	/* Because we emit WA_TAIL_DWORDS there may be a disparity
+	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
+	 * that stored in context. As we only write new commands from
+	 * ce->ring->tail onwards, everything before that is junk. If the GPU
+	 * starts reading from its RING_HEAD from the context, it may try to
+	 * execute that junk and die.
+	 *
+	 * So to avoid that we reset the context images upon resume. For
+	 * simplicity, we just zero everything out.
+	 */
+	list_for_each_entry(ctx, &dev_priv->context_list, link) {
+		for_each_engine(engine, dev_priv) {
+			struct intel_context *ce = &ctx->engine[engine->id];
+			u32 *reg;
 
-	for_each_engine(engine, dev_priv) {
-		struct intel_context *ce = &ctx->engine[engine->id];
-		void *vaddr;
-		uint32_t *reg_state;
-
-		if (!ce->state)
-			continue;
-
-		vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
-		if (WARN_ON(IS_ERR(vaddr)))
-			continue;
+			if (!ce->state)
+				continue;
 
-		reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+			reg = i915_gem_object_pin_map(ce->state->obj,
+						      I915_MAP_WB);
+			if (WARN_ON(IS_ERR(reg)))
+				continue;
 
-		reg_state[CTX_RING_HEAD+1] = 0;
-		reg_state[CTX_RING_TAIL+1] = 0;
+			reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg);
+			reg[CTX_RING_HEAD+1] = 0;
+			reg[CTX_RING_TAIL+1] = 0;
 
-		ce->state->obj->dirty = true;
-		i915_gem_object_unpin_map(ce->state->obj);
+			ce->state->obj->dirty = true;
+			i915_gem_object_unpin_map(ce->state->obj);
 
-		ce->ring->head = 0;
-		ce->ring->tail = 0;
+			ce->ring->head = ce->ring->tail = 0;
+			ce->ring->last_retired_head = -1;
+			intel_ring_update_space(ce->ring);
+		}
 	}
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [CI,1/3] drm/i915: Restore current RPS state after reset
  2016-09-21 13:51 [CI 1/3] drm/i915: Restore current RPS state after reset Chris Wilson
  2016-09-21 13:51 ` [CI 2/3] drm/i915: Only shrink the unbound objects during freeze Chris Wilson
  2016-09-21 13:51 ` [CI 3/3] drm/i915/execlists: Reset RING registers upon resume Chris Wilson
@ 2016-09-21 14:49 ` Patchwork
  2 siblings, 0 replies; 4+ messages in thread
From: Patchwork @ 2016-09-21 14:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/3] drm/i915: Restore current RPS state after reset
URL   : https://patchwork.freedesktop.org/series/12741/
State : failure

== Summary ==

Series 12741v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/12741/revisions/1/mbox/

Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> DMESG-WARN (fi-skl-6770hq)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> INCOMPLETE (fi-hsw-4770k)

fi-bdw-5557u     total:244  pass:229  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050     total:244  pass:202  dwarn:0   dfail:0   fail:0   skip:42 
fi-byt-n2820     total:244  pass:208  dwarn:0   dfail:0   fail:1   skip:35 
fi-hsw-4770k     total:196  pass:175  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-ilk-650       total:244  pass:182  dwarn:0   dfail:0   fail:2   skip:60 
fi-ivb-3520m     total:244  pass:219  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770      total:244  pass:207  dwarn:0   dfail:0   fail:0   skip:37 
fi-skl-6260u     total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hq    total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6700k     total:244  pass:219  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hq    total:244  pass:227  dwarn:2   dfail:0   fail:1   skip:14 
fi-snb-2520m     total:244  pass:208  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600      total:244  pass:207  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2564/

395eb4879cae6cc107c9f673008ae2a77bd43c66 drm-intel-nightly: 2016y-09m-21d-13h-42m-23s UTC integration manifest
d4bba6e drm/i915/execlists: Reset RING registers upon resume
120a221 drm/i915: Only shrink the unbound objects during freeze
d92d44b drm/i915: Restore current RPS state after reset

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-09-21 14:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-21 13:51 [CI 1/3] drm/i915: Restore current RPS state after reset Chris Wilson
2016-09-21 13:51 ` [CI 2/3] drm/i915: Only shrink the unbound objects during freeze Chris Wilson
2016-09-21 13:51 ` [CI 3/3] drm/i915/execlists: Reset RING registers upon resume Chris Wilson
2016-09-21 14:49 ` ✗ Fi.CI.BAT: failure for series starting with [CI,1/3] drm/i915: Restore current RPS state after reset Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.