All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare()
@ 2017-01-16  9:42 Chris Wilson
  2017-01-16  9:42 ` [PATCH 2/2] drm/i915: Detect a failed GPU reset+recovery Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Chris Wilson @ 2017-01-16  9:42 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

Now that we have prepare/finish routines for the GEM reset, move the
disabling of the engine->irq_tasklet into them to reduce repetition. The
device irq enable/disable is split out to ensure it is run first and
last always (even if the GPU reset fails).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 21 ++-------------------
 drivers/gpu/drm/i915/i915_gem.c |  7 +++++++
 2 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4e5ea5898e06..bb747aeb29aa 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1728,22 +1728,6 @@ static int i915_resume_switcheroo(struct drm_device *dev)
 	return i915_drm_resume(dev);
 }
 
-static void disable_engines_irq(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/* Ensure irq handler finishes, and not run again. */
-	disable_irq(dev_priv->drm.irq);
-	for_each_engine(engine, dev_priv, id)
-		tasklet_kill(&engine->irq_tasklet);
-}
-
-static void enable_engines_irq(struct drm_i915_private *dev_priv)
-{
-	enable_irq(dev_priv->drm.irq);
-}
-
 /**
  * i915_reset - reset chip after a hang
  * @dev_priv: device private to reset
@@ -1776,12 +1760,10 @@ void i915_reset(struct drm_i915_private *dev_priv)
 	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
+	disable_irq(dev_priv->drm.irq);
 	i915_gem_reset_prepare(dev_priv);
 
-	disable_engines_irq(dev_priv);
 	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
-	enable_engines_irq(dev_priv);
-
 	if (ret) {
 		if (ret != -ENODEV)
 			DRM_ERROR("Failed to reset chip: %i\n", ret);
@@ -1816,6 +1798,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 	i915_queue_hangcheck(dev_priv);
 
 wakeup:
+	enable_irq(dev_priv->drm.irq);
 	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
 	return;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d4c59b53532e..94450621e6cb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2631,6 +2631,13 @@ static void reset_request(struct drm_i915_gem_request *request)
 
 void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/* Ensure irq handler finishes, and not run again. */
+	for_each_engine(engine, dev_priv, id)
+		tasklet_kill(&engine->irq_tasklet);
+
 	i915_gem_revoke_fences(dev_priv);
 }
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/i915: Detect a failed GPU reset+recovery
  2017-01-16  9:42 [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Chris Wilson
@ 2017-01-16  9:42 ` Chris Wilson
  2017-01-16 11:18   ` Chris Wilson
  2017-01-16 15:00   ` [PATCH] " Chris Wilson
  2017-01-16 11:54 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 9+ messages in thread
From: Chris Wilson @ 2017-01-16  9:42 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

If we can't recover the GPU after the reset, mark it as wedged to cancel
the outstanding tasks and to prevent new users from trying to use the
broken GPU.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c |  7 ++++++-
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++++++--
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index bb747aeb29aa..5ee62976ec98 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv)
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 	disable_irq(dev_priv->drm.irq);
-	i915_gem_reset_prepare(dev_priv);
+	ret = i915_gem_reset_prepare(dev_priv);
+	if (ret) {
+		DRM_ERROR("GPU recovery failed\n");
+		intel_gpu_reset(dev_priv, ALL_ENGINES);
+		goto error;
+	}
 
 	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f861418122ef..38509505424d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3328,7 +3328,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 94450621e6cb..beb3cd7e2f35 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2629,16 +2629,26 @@ static void reset_request(struct drm_i915_gem_request *request)
 	dma_fence_set_error(&request->fence, -EIO);
 }
 
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	int err = 0;
 
 	/* Ensure irq handler finishes, and not run again. */
-	for_each_engine(engine, dev_priv, id)
+	for_each_engine(engine, dev_priv, id) {
+		struct drm_i915_gem_request *request;
+
 		tasklet_kill(&engine->irq_tasklet);
 
+		request = i915_gem_find_active_request(engine);
+		if (request && request->fence.error == -EIO)
+			err = -EIO; /* Previous reset failed! */
+	}
+
 	i915_gem_revoke_fences(dev_priv);
+
+	return err;
 }
 
 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/i915: Detect a failed GPU reset+recovery
  2017-01-16  9:42 ` [PATCH 2/2] drm/i915: Detect a failed GPU reset+recovery Chris Wilson
@ 2017-01-16 11:18   ` Chris Wilson
  2017-01-16 15:00   ` [PATCH] " Chris Wilson
  1 sibling, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2017-01-16 11:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

On Mon, Jan 16, 2017 at 09:42:52AM +0000, Chris Wilson wrote:
> If we can't recover the GPU after the reset, mark it as wedged to cancel
> the outstanding tasks and to prevent new users from trying to use the
> broken GPU.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> -void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
> +int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
> +	int err = 0;
>  
>  	/* Ensure irq handler finishes, and not run again. */
> -	for_each_engine(engine, dev_priv, id)
> +	for_each_engine(engine, dev_priv, id) {
> +		struct drm_i915_gem_request *request;
> +
>  		tasklet_kill(&engine->irq_tasklet);
>  
> +		request = i915_gem_find_active_request(engine);
> +		if (request && request->fence.error == -EIO)
> +			err = -EIO; /* Previous reset failed! */

This should check that it is this engine that is declared as hung - as
we may not have given the GPU the chance to even execute the requests
from the previous reset.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare()
  2017-01-16  9:42 [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Chris Wilson
  2017-01-16  9:42 ` [PATCH 2/2] drm/i915: Detect a failed GPU reset+recovery Chris Wilson
@ 2017-01-16 11:54 ` Patchwork
  2017-01-16 14:03 ` [PATCH 1/2] " Mika Kuoppala
  2017-01-16 15:54 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() (rev2) Patchwork
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2017-01-16 11:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare()
URL   : https://patchwork.freedesktop.org/series/18048/
State : success

== Summary ==

Series 18048v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/18048/revisions/1/mbox/


fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-bxt-t5700     total:82   pass:69   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-skl-6700hq    total:246  pass:226  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6700k     total:246  pass:222  dwarn:3   dfail:0   fail:0   skip:21 
fi-snb-2520m     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

8f5a13bb4605ce9d60e1f2cd2722c9e2854e6749 drm-tip: 2017y-01m-16d-09h-31m-14s UTC integration manifest
0690a58 drm/i915: Detect a failed GPU reset+recovery
5fb00c3 drm/i915: Move engine reset preparation to i915_gem_reset_prepare()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3525/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare()
  2017-01-16  9:42 [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Chris Wilson
  2017-01-16  9:42 ` [PATCH 2/2] drm/i915: Detect a failed GPU reset+recovery Chris Wilson
  2017-01-16 11:54 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Patchwork
@ 2017-01-16 14:03 ` Mika Kuoppala
  2017-01-16 14:18   ` Chris Wilson
  2017-01-16 15:54 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() (rev2) Patchwork
  3 siblings, 1 reply; 9+ messages in thread
From: Mika Kuoppala @ 2017-01-16 14:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Now that we have prepare/finish routines for the GEM reset, move the
> disabling of the engine->irq_tasklet into them to reduce repetition. The
> device irq enable/disable is split out to ensure it is run first and
> last always (even if the GPU reset fails).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>

As discussed briefly in irc, the disabling could be part of
reset and enabling part of init_hw, that way we could wedge
also with irq's off, even if it breaks the symmetry.

Regardless, this is an improvement.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.c | 21 ++-------------------
>  drivers/gpu/drm/i915/i915_gem.c |  7 +++++++
>  2 files changed, 9 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 4e5ea5898e06..bb747aeb29aa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1728,22 +1728,6 @@ static int i915_resume_switcheroo(struct drm_device *dev)
>  	return i915_drm_resume(dev);
>  }
>  
> -static void disable_engines_irq(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	/* Ensure irq handler finishes, and not run again. */
> -	disable_irq(dev_priv->drm.irq);
> -	for_each_engine(engine, dev_priv, id)
> -		tasklet_kill(&engine->irq_tasklet);
> -}
> -
> -static void enable_engines_irq(struct drm_i915_private *dev_priv)
> -{
> -	enable_irq(dev_priv->drm.irq);
> -}
> -
>  /**
>   * i915_reset - reset chip after a hang
>   * @dev_priv: device private to reset
> @@ -1776,12 +1760,10 @@ void i915_reset(struct drm_i915_private *dev_priv)
>  	error->reset_count++;
>  
>  	pr_notice("drm/i915: Resetting chip after gpu hang\n");
> +	disable_irq(dev_priv->drm.irq);
>  	i915_gem_reset_prepare(dev_priv);
>  
> -	disable_engines_irq(dev_priv);
>  	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
> -	enable_engines_irq(dev_priv);
> -
>  	if (ret) {
>  		if (ret != -ENODEV)
>  			DRM_ERROR("Failed to reset chip: %i\n", ret);
> @@ -1816,6 +1798,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
>  	i915_queue_hangcheck(dev_priv);
>  
>  wakeup:
> +	enable_irq(dev_priv->drm.irq);
>  	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
>  	return;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d4c59b53532e..94450621e6cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2631,6 +2631,13 @@ static void reset_request(struct drm_i915_gem_request *request)
>  
>  void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
>  {
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	/* Ensure irq handler finishes, and not run again. */
> +	for_each_engine(engine, dev_priv, id)
> +		tasklet_kill(&engine->irq_tasklet);
> +
>  	i915_gem_revoke_fences(dev_priv);
>  }
>  
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare()
  2017-01-16 14:03 ` [PATCH 1/2] " Mika Kuoppala
@ 2017-01-16 14:18   ` Chris Wilson
  2017-01-16 14:37     ` Mika Kuoppala
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2017-01-16 14:18 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Mon, Jan 16, 2017 at 04:03:16PM +0200, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Now that we have prepare/finish routines for the GEM reset, move the
> > disabling of the engine->irq_tasklet into them to reduce repetition. The
> > device irq enable/disable is split out to ensure it is run first and
> > last always (even if the GPU reset fails).
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> 
> As discussed briefly in irc, the disabling could be part of
> reset and enabling part of init_hw, that way we could wedge
> also with irq's off, even if it breaks the symmetry.

We can't leave the irq off when wedged. It's the device IRQ, so hotplug,
gmbus and all sorts of transactions, not just our own user +
context-switch interrupts. Did you have something else in mind?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare()
  2017-01-16 14:18   ` Chris Wilson
@ 2017-01-16 14:37     ` Mika Kuoppala
  0 siblings, 0 replies; 9+ messages in thread
From: Mika Kuoppala @ 2017-01-16 14:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Mon, Jan 16, 2017 at 04:03:16PM +0200, Mika Kuoppala wrote:
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > Now that we have prepare/finish routines for the GEM reset, move the
>> > disabling of the engine->irq_tasklet into them to reduce repetition. The
>> > device irq enable/disable is split out to ensure it is run first and
>> > last always (even if the GPU reset fails).
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> 
>> As discussed briefly in irc, the disabling could be part of
>> reset and enabling part of init_hw, that way we could wedge
>> also with irq's off, even if it breaks the symmetry.
>
> We can't leave the irq off when wedged. It's the device IRQ, so hotplug,
> gmbus and all sorts of transactions, not just our own user +
> context-switch interrupts. Did you have something else in mind?

Ah yes indeed that would be nasty for the display side. I was thinking
that reset and whole hw init while irqs are off would be safer or
more controlled.

-Mika

> -Chris
>
> -- 
> Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] drm/i915: Detect a failed GPU reset+recovery
  2017-01-16  9:42 ` [PATCH 2/2] drm/i915: Detect a failed GPU reset+recovery Chris Wilson
  2017-01-16 11:18   ` Chris Wilson
@ 2017-01-16 15:00   ` Chris Wilson
  1 sibling, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2017-01-16 15:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

If we can't recover the GPU after the reset, mark it as wedged to cancel
the outstanding tasks and to prevent new users from trying to use the
broken GPU.

v2: Check the same ring is hung again before declaring the reset broken.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c |  7 ++++++-
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 16 ++++++++++++++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index bb747aeb29aa..5ee62976ec98 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv)
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 	disable_irq(dev_priv->drm.irq);
-	i915_gem_reset_prepare(dev_priv);
+	ret = i915_gem_reset_prepare(dev_priv);
+	if (ret) {
+		DRM_ERROR("GPU recovery failed\n");
+		intel_gpu_reset(dev_priv, ALL_ENGINES);
+		goto error;
+	}
 
 	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f861418122ef..38509505424d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3328,7 +3328,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 021d1e97cc15..e9dfd320a0e3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2629,16 +2629,28 @@ static void reset_request(struct drm_i915_gem_request *request)
 	dma_fence_set_error(&request->fence, -EIO);
 }
 
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	int err = 0;
 
 	/* Ensure irq handler finishes, and not run again. */
-	for_each_engine(engine, dev_priv, id)
+	for_each_engine(engine, dev_priv, id) {
+		struct drm_i915_gem_request *request;
+
 		tasklet_kill(&engine->irq_tasklet);
 
+		if (engine->hangcheck.stalled) {
+			request = i915_gem_find_active_request(engine);
+			if (request && request->fence.error == -EIO)
+				err = -EIO; /* Previous reset failed! */
+		}
+	}
+
 	i915_gem_revoke_fences(dev_priv);
+
+	return err;
 }
 
 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() (rev2)
  2017-01-16  9:42 [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Chris Wilson
                   ` (2 preceding siblings ...)
  2017-01-16 14:03 ` [PATCH 1/2] " Mika Kuoppala
@ 2017-01-16 15:54 ` Patchwork
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2017-01-16 15:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() (rev2)
URL   : https://patchwork.freedesktop.org/series/18048/
State : success

== Summary ==

Series 18048v2 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/18048/revisions/2/mbox/


fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-bxt-t5700     total:82   pass:69   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-skl-6700hq    total:246  pass:226  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6700k     total:246  pass:222  dwarn:3   dfail:0   fail:0   skip:21 
fi-skl-6770hq    total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-snb-2520m     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

e0c7007e02b52375d3e5daa1bc4ef2e6d00e1016 drm-tip: 2017y-01m-16d-12h-26m-22s UTC integration manifest
6f862e7 drm/i915: Detect a failed GPU reset+recovery
ca09e28 drm/i915: Move engine reset preparation to i915_gem_reset_prepare()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3530/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-01-16 15:54 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-16  9:42 [PATCH 1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Chris Wilson
2017-01-16  9:42 ` [PATCH 2/2] drm/i915: Detect a failed GPU reset+recovery Chris Wilson
2017-01-16 11:18   ` Chris Wilson
2017-01-16 15:00   ` [PATCH] " Chris Wilson
2017-01-16 11:54 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() Patchwork
2017-01-16 14:03 ` [PATCH 1/2] " Mika Kuoppala
2017-01-16 14:18   ` Chris Wilson
2017-01-16 14:37     ` Mika Kuoppala
2017-01-16 15:54 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Move engine reset preparation to i915_gem_reset_prepare() (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.