[PATCH 1/2] drm/i915: Construct a request even if the GPU is currently hung

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] drm/i915: Construct a request even if the GPU is currently hung
@ 2017-01-14 16:23 Chris Wilson
  2017-01-14 16:23 ` [PATCH 2/2] drm/i915: Skip switch to kernel context if already done Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Chris Wilson @ 2017-01-14 16:23 UTC (permalink / raw)
  To: intel-gfx

As we now have the ability to directly reset the GPU from the waiter
(and so do not need to drop the lock in order to let the reset proceed)
and also do not lose requests over a reset, we can now simply queue the
request to occur after the reset rather than roundtripping to userspace
(or worse failing with EIO).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e614398abe2f..72b7f7d9461d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -307,26 +307,6 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	} while (tmp != req);
 }
 
-static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
-{
-	struct i915_gpu_error *error = &dev_priv->gpu_error;
-
-	if (i915_terminally_wedged(error))
-		return -EIO;
-
-	if (i915_reset_in_progress(error)) {
-		/* Non-interruptible callers can't handle -EAGAIN, hence return
-		 * -EIO unconditionally for these.
-		 */
-		if (!dev_priv->mm.interruptible)
-			return -EIO;
-
-		return -EAGAIN;
-	}
-
-	return 0;
-}
-
 static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
 	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
@@ -521,12 +501,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
-	 * and restart.
+	 * EIO if the GPU is already wedged.
 	 */
-	ret = i915_gem_check_wedge(dev_priv);
-	if (ret)
-		return ERR_PTR(ret);
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		return ERR_PTR(-EIO);
 
 	/* Pinning the contexts may generate requests in order to acquire
 	 * GGTT space, so do this first before we reserve a seqno for
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/i915: Skip switch to kernel context if already done
  2017-01-14 16:23 [PATCH 1/2] drm/i915: Construct a request even if the GPU is currently hung Chris Wilson
@ 2017-01-14 16:23 ` Chris Wilson
  2017-01-16 11:45   ` Joonas Lahtinen
  2017-01-14 17:29 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Construct a request even if the GPU is currently hung Patchwork
  2017-01-16  9:31 ` [PATCH 1/2] " Joonas Lahtinen
  2 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2017-01-14 16:23 UTC (permalink / raw)
  To: intel-gfx

Some engines are never user or already sitting idle in the kernel
context and for those we can skip flushing the current context for
i915_gem_switch_to_kernel_context(). We used to perform this
optimisation but that was removed for convenience of converting over to
multiple timelines and handling the pending request queues.

From the perspective of writing selftests, reducing the number of
background operations on the engines makes defining assertions easier.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         |  3 ++-
 drivers/gpu/drm/i915/i915_gem_context.c | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d4c59b53532e..83cd2eff37af 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4140,7 +4140,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
 	enum intel_engine_id id;
 
 	for_each_engine(engine, dev_priv, id)
-		GEM_BUG_ON(!i915_gem_context_is_kernel(engine->last_retired_context));
+		GEM_BUG_ON(engine->last_retired_context &&
+			   !i915_gem_context_is_kernel(engine->last_retired_context));
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ae99c25397ca..0a4728fdecdc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -877,6 +877,26 @@ int i915_switch_context(struct drm_i915_gem_request *req)
 	return do_rcs_switch(req);
 }
 
+static bool engine_has_kernel_context(struct intel_engine_cs *engine)
+{
+	struct i915_gem_timeline *timeline;
+
+	list_for_each_entry(timeline, &engine->i915->gt.timelines, link) {
+		struct intel_timeline *tl;
+
+		if (timeline == &engine->i915->gt.global_timeline)
+			continue;
+
+		tl = &timeline->engine[engine->id];
+		if (i915_gem_active_peek(&tl->last_request,
+					 &engine->i915->drm.struct_mutex))
+			return false;
+	}
+
+	return (!engine->last_retired_context ||
+		i915_gem_context_is_kernel(engine->last_retired_context));
+}
+
 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
@@ -885,10 +905,15 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	i915_gem_retire_requests(dev_priv);
+
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *req;
 		int ret;
 
+		if (engine_has_kernel_context(engine))
+			continue;
+
 		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
 		if (IS_ERR(req))
 			return PTR_ERR(req);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Construct a request even if the GPU is currently hung
  2017-01-14 16:23 [PATCH 1/2] drm/i915: Construct a request even if the GPU is currently hung Chris Wilson
  2017-01-14 16:23 ` [PATCH 2/2] drm/i915: Skip switch to kernel context if already done Chris Wilson
@ 2017-01-14 17:29 ` Patchwork
  2017-01-16  9:31 ` [PATCH 1/2] " Joonas Lahtinen
  2 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2017-01-14 17:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Construct a request even if the GPU is currently hung
URL   : https://patchwork.freedesktop.org/series/18013/
State : success

== Summary ==

Series 18013v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/18013/revisions/1/mbox/

Test kms_pipe_crc_basic:
        Subgroup bad-nb-words-1:
                dmesg-warn -> PASS       (fi-ivb-3770)

fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:246  pass:224  dwarn:0   dfail:0   fail:0   skip:22 
fi-bxt-t5700     total:82   pass:69   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-skl-6700hq    total:246  pass:226  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6700k     total:246  pass:222  dwarn:3   dfail:0   fail:0   skip:21 
fi-skl-6770hq    total:246  pass:233  dwarn:0   dfail:0   fail:0   skip:13 
fi-snb-2520m     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

af723422378a6d17bd5b632880b195f8443445b5 drm-tip: 2017y-01m-14d-16h-21m-06s UTC integration manifest
1dd1272 drm/i915: Skip switch to kernel context if already done
c5a8c03 drm/i915: Construct a request even if the GPU is currently hung

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3520/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/i915: Construct a request even if the GPU is currently hung
  2017-01-14 16:23 [PATCH 1/2] drm/i915: Construct a request even if the GPU is currently hung Chris Wilson
  2017-01-14 16:23 ` [PATCH 2/2] drm/i915: Skip switch to kernel context if already done Chris Wilson
  2017-01-14 17:29 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Construct a request even if the GPU is currently hung Patchwork
@ 2017-01-16  9:31 ` Joonas Lahtinen
  2 siblings, 0 replies; 6+ messages in thread
From: Joonas Lahtinen @ 2017-01-16  9:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On la, 2017-01-14 at 16:23 +0000, Chris Wilson wrote:
> As we now have the ability to directly reset the GPU from the waiter
> (and so do not need to drop the lock in order to let the reset proceed)
> and also do not lose requests over a reset, we can now simply queue the
> request to occur after the reset rather than roundtripping to userspace
> (or worse failing with EIO).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] drm/i915: Skip switch to kernel context if already done
  2017-01-14 16:23 ` [PATCH 2/2] drm/i915: Skip switch to kernel context if already done Chris Wilson
@ 2017-01-16 11:45   ` Joonas Lahtinen
  0 siblings, 0 replies; 6+ messages in thread
From: Joonas Lahtinen @ 2017-01-16 11:45 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On la, 2017-01-14 at 16:23 +0000, Chris Wilson wrote:
> Some engines are never user or already sitting idle in the kernel
> context and for those we can skip flushing the current context for
> i915_gem_switch_to_kernel_context(). We used to perform this
> optimisation but that was removed for convenience of converting over to
> multiple timelines and handling the pending request queues.
> 
> From the perspective of writing selftests, reducing the number of
> background operations on the engines makes defining assertions easier.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/i915: Skip switch to kernel context if already done
  2017-01-10 12:06 Chris Wilson
@ 2017-01-10 12:06 ` Chris Wilson
  0 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2017-01-10 12:06 UTC (permalink / raw)
  To: intel-gfx

Some engines are never user or already sitting idle in the kernel
context and for those we can skip flushing the current context for
i915_gem_switch_to_kernel_context(). We used to perform this
optimisation but that was removed for convenience of converting over to
multiple timelines and handling the pending request queues.

From the perspective of writing selftests, reducing the number of
background operations on the engines makes defining assertions easier.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         |  3 ++-
 drivers/gpu/drm/i915/i915_gem_context.c | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dc4c73ca07e1..3050b4eed494 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4144,7 +4144,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
 	enum intel_engine_id id;
 
 	for_each_engine(engine, dev_priv, id)
-		GEM_BUG_ON(!i915_gem_context_is_kernel(engine->last_retired_context));
+		GEM_BUG_ON(!(engine->last_retired_context == NULL ||
+			     i915_gem_context_is_kernel(engine->last_retired_context)));
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 40a6939e3956..a8f6b8843bdf 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -897,6 +897,26 @@ int i915_switch_context(struct drm_i915_gem_request *req)
 	return do_rcs_switch(req);
 }
 
+static bool engine_has_kernel_context(struct intel_engine_cs *engine)
+{
+	struct i915_gem_timeline *timeline;
+
+	list_for_each_entry(timeline, &engine->i915->gt.timelines, link) {
+		struct intel_timeline *tl;
+
+		if (timeline == &engine->i915->gt.global_timeline)
+			continue;
+
+		tl = &timeline->engine[engine->id];
+		if (i915_gem_active_peek(&tl->last_request,
+					 &engine->i915->drm.struct_mutex))
+			return false;
+	}
+
+	return (engine->last_retired_context == NULL ||
+		i915_gem_context_is_kernel(engine->last_retired_context));
+}
+
 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
@@ -905,10 +925,15 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	i915_gem_retire_requests(dev_priv);
+
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *req;
 		int ret;
 
+		if (engine_has_kernel_context(engine))
+			continue;
+
 		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
 		if (IS_ERR(req))
 			return PTR_ERR(req);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-01-16 11:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-14 16:23 [PATCH 1/2] drm/i915: Construct a request even if the GPU is currently hung Chris Wilson
2017-01-14 16:23 ` [PATCH 2/2] drm/i915: Skip switch to kernel context if already done Chris Wilson
2017-01-16 11:45   ` Joonas Lahtinen
2017-01-14 17:29 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Construct a request even if the GPU is currently hung Patchwork
2017-01-16  9:31 ` [PATCH 1/2] " Joonas Lahtinen
  -- strict thread matches above, loose matches on Subject: below --
2017-01-10 12:06 Chris Wilson
2017-01-10 12:06 ` [PATCH 2/2] drm/i915: Skip switch to kernel context if already done Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.