All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
@ 2019-03-08  9:36 Chris Wilson
  2019-03-08  9:36 ` [PATCH 2/4] drm/i915: Refactor common code to load initial power context Chris Wilson
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: Chris Wilson @ 2019-03-08  9:36 UTC (permalink / raw)
  To: intel-gfx

When the system idles, we switch to the kernel context as a defensive
measure (no users are harmed if the kernel context is lost). Currently,
we issue a switch to kernel context and then come back later to see if
the kernel context is still current and the system is idle. However,
if we are no longer privy to the runqueue ordering, then we have to
relax our assumptions about the logical state of the GPU and the only
way to ensure that the kernel context is currently loaded is by issuing
a request to run after all others, and wait for it to complete all while
preventing anyone else from issuing their own requests.

v2: Pull wedging into switch_to_kernel_context_sync() but only after
waiting (though only for the same short delay) for the active context to
finish.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c           |  14 +-
 drivers/gpu/drm/i915/i915_drv.h           |   2 +-
 drivers/gpu/drm/i915/i915_gem.c           | 154 +++++++++-------------
 drivers/gpu/drm/i915/i915_gem_context.c   |   4 +
 drivers/gpu/drm/i915/selftests/i915_gem.c |   9 +-
 5 files changed, 73 insertions(+), 110 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 777a9a19414d..0d743907e7bc 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -714,8 +714,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	return 0;
 
 cleanup_gem:
-	if (i915_gem_suspend(dev_priv))
-		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
+	i915_gem_suspend(dev_priv);
 	i915_gem_fini(dev_priv);
 cleanup_modeset:
 	intel_modeset_cleanup(dev);
@@ -1900,8 +1899,7 @@ void i915_driver_unload(struct drm_device *dev)
 	/* Flush any external code that still may be under the RCU lock */
 	synchronize_rcu();
 
-	if (i915_gem_suspend(dev_priv))
-		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
+	i915_gem_suspend(dev_priv);
 
 	drm_atomic_helper_shutdown(dev);
 
@@ -2009,7 +2007,6 @@ static bool suspend_to_idle(struct drm_i915_private *dev_priv)
 static int i915_drm_prepare(struct drm_device *dev)
 {
 	struct drm_i915_private *i915 = to_i915(dev);
-	int err;
 
 	/*
 	 * NB intel_display_suspend() may issue new requests after we've
@@ -2017,12 +2014,9 @@ static int i915_drm_prepare(struct drm_device *dev)
 	 * split out that work and pull it forward so that after point,
 	 * the GPU is not woken again.
 	 */
-	err = i915_gem_suspend(i915);
-	if (err)
-		dev_err(&i915->drm.pdev->dev,
-			"GEM idle failed, suspend/resume might fail\n");
+	i915_gem_suspend(i915);
 
-	return err;
+	return 0;
 }
 
 static int i915_drm_suspend(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a5b314a0c415..b8a5281d8adf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3032,7 +3032,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 			   unsigned int flags, long timeout);
-int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
+void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
 vm_fault_t i915_gem_fault(struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index df2f4f65c2a4..f22de3b5a1f3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2828,13 +2828,6 @@ i915_gem_retire_work_handler(struct work_struct *work)
 				   round_jiffies_up_relative(HZ));
 }
 
-static inline bool
-new_requests_since_last_retire(const struct drm_i915_private *i915)
-{
-	return (READ_ONCE(i915->gt.active_requests) ||
-		work_pending(&i915->gt.idle_work.work));
-}
-
 static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -2843,7 +2836,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 	if (i915_reset_failed(i915))
 		return;
 
-	GEM_BUG_ON(i915->gt.active_requests);
+	i915_retire_requests(i915);
+
 	for_each_engine(engine, i915, id) {
 		GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
 		GEM_BUG_ON(engine->last_retired_context !=
@@ -2851,77 +2845,86 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 	}
 }
 
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
+{
+	bool result = true;
+
+	/*
+	 * Even if we fail to switch, give whatever is running a small chance
+	 * to save itself before we report the failure. Yes, this may be a
+	 * false positive due to e.g. ENOMEM, caveat emptor!
+	 */
+	if (i915_gem_switch_to_kernel_context(i915))
+		result = false;
+
+	if (i915_gem_wait_for_idle(i915,
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_FOR_IDLE_BOOST,
+				   I915_GEM_IDLE_TIMEOUT))
+		result = false;
+
+	if (result) {
+		assert_kernel_context_is_current(i915);
+	} else {
+		/* Forcibly cancel outstanding work and leave the gpu quiet. */
+		dev_err(i915->drm.dev,
+			"Failed to idle engines, declaring wedged!\n");
+		GEM_TRACE_DUMP();
+		i915_gem_set_wedged(i915);
+	}
+
+	i915_retire_requests(i915); /* ensure we flush after wedging */
+	return result;
+}
+
 static void
 i915_gem_idle_work_handler(struct work_struct *work)
 {
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), gt.idle_work.work);
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gt.idle_work.work);
 	bool rearm_hangcheck;
 
-	if (!READ_ONCE(dev_priv->gt.awake))
+	if (!READ_ONCE(i915->gt.awake))
 		return;
 
-	if (READ_ONCE(dev_priv->gt.active_requests))
+	if (READ_ONCE(i915->gt.active_requests))
 		return;
 
-	/*
-	 * Flush out the last user context, leaving only the pinned
-	 * kernel context resident. When we are idling on the kernel_context,
-	 * no more new requests (with a context switch) are emitted and we
-	 * can finally rest. A consequence is that the idle work handler is
-	 * always called at least twice before idling (and if the system is
-	 * idle that implies a round trip through the retire worker).
-	 */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_gem_switch_to_kernel_context(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-	GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
-		  READ_ONCE(dev_priv->gt.active_requests));
-
-	/*
-	 * Wait for last execlists context complete, but bail out in case a
-	 * new request is submitted. As we don't trust the hardware, we
-	 * continue on if the wait times out. This is necessary to allow
-	 * the machine to suspend even if the hardware dies, and we will
-	 * try to recover in resume (after depriving the hardware of power,
-	 * it may be in a better mmod).
-	 */
-	__wait_for(if (new_requests_since_last_retire(dev_priv)) return,
-		   intel_engines_are_idle(dev_priv),
-		   I915_IDLE_ENGINES_TIMEOUT * 1000,
-		   10, 500);
-
 	rearm_hangcheck =
-		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
+		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
 
-	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
+	if (!mutex_trylock(&i915->drm.struct_mutex)) {
 		/* Currently busy, come back later */
-		mod_delayed_work(dev_priv->wq,
-				 &dev_priv->gt.idle_work,
+		mod_delayed_work(i915->wq,
+				 &i915->gt.idle_work,
 				 msecs_to_jiffies(50));
 		goto out_rearm;
 	}
 
 	/*
-	 * New request retired after this work handler started, extend active
-	 * period until next instance of the work.
+	 * Flush out the last user context, leaving only the pinned
+	 * kernel context resident. Should anything unfortunate happen
+	 * while we are idle (such as the GPU being power cycled), no users
+	 * will be harmed.
 	 */
-	if (new_requests_since_last_retire(dev_priv))
-		goto out_unlock;
+	if (!work_pending(&i915->gt.idle_work.work) &&
+	    !i915->gt.active_requests) {
+		++i915->gt.active_requests; /* don't requeue idle */
 
-	__i915_gem_park(dev_priv);
+		switch_to_kernel_context_sync(i915);
 
-	assert_kernel_context_is_current(dev_priv);
+		if (!--i915->gt.active_requests) {
+			__i915_gem_park(i915);
+			rearm_hangcheck = false;
+		}
+	}
 
-	rearm_hangcheck = false;
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&i915->drm.struct_mutex);
 
 out_rearm:
 	if (rearm_hangcheck) {
-		GEM_BUG_ON(!dev_priv->gt.awake);
-		i915_queue_hangcheck(dev_priv);
+		GEM_BUG_ON(!i915->gt.awake);
+		i915_queue_hangcheck(i915);
 	}
 }
 
@@ -3128,7 +3131,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 			return err;
 
 		i915_retire_requests(i915);
-		GEM_BUG_ON(i915->gt.active_requests);
 	}
 
 	return 0;
@@ -4340,10 +4342,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	mutex_unlock(&i915->drm.struct_mutex);
 }
 
-int i915_gem_suspend(struct drm_i915_private *i915)
+void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
-	int ret;
 
 	GEM_TRACE("\n");
 
@@ -4363,23 +4364,7 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	if (!i915_reset_failed(i915)) {
-		ret = i915_gem_switch_to_kernel_context(i915);
-		if (ret)
-			goto err_unlock;
-
-		ret = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE |
-					     I915_WAIT_LOCKED |
-					     I915_WAIT_FOR_IDLE_BOOST,
-					     I915_GEM_IDLE_TIMEOUT);
-		if (ret == -EINTR)
-			goto err_unlock;
-
-		/* Forcibly cancel outstanding work and leave the gpu quiet. */
-		i915_gem_set_wedged(i915);
-	}
-	i915_retire_requests(i915); /* ensure we flush after wedging */
+	switch_to_kernel_context_sync(i915);
 
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_reset_flush(i915);
@@ -4399,12 +4384,6 @@ int i915_gem_suspend(struct drm_i915_private *i915)
 	GEM_BUG_ON(i915->gt.awake);
 
 	intel_runtime_pm_put(i915, wakeref);
-	return 0;
-
-err_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	intel_runtime_pm_put(i915, wakeref);
-	return ret;
 }
 
 void i915_gem_suspend_late(struct drm_i915_private *i915)
@@ -4670,20 +4649,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 			goto err_active;
 	}
 
-	err = i915_gem_switch_to_kernel_context(i915);
-	if (err)
-		goto err_active;
-
-	if (i915_gem_wait_for_idle(i915,
-				   I915_WAIT_LOCKED,
-				   I915_GEM_IDLE_TIMEOUT)) {
-		i915_gem_set_wedged(i915);
+	if (!switch_to_kernel_context_sync(i915)) {
 		err = -EIO; /* Caller will declare us wedged */
 		goto err_active;
 	}
 
-	assert_kernel_context_is_current(i915);
-
 	/*
 	 * Immediately park the GPU so that we enable powersaving and
 	 * treat it as idle. The next time we issue a request, we will
@@ -4927,7 +4897,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_init_hw:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	WARN_ON(i915_gem_suspend(dev_priv));
+	i915_gem_suspend(dev_priv);
 	i915_gem_suspend_late(dev_priv);
 
 	i915_gem_drain_workqueue(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b9f321947982..9a3eb4f66d85 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -767,6 +767,10 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915->kernel_context);
 
+	/* Inoperable, so presume the GPU is safely pointing into the void! */
+	if (i915_terminally_wedged(i915))
+		return 0;
+
 	i915_retire_requests(i915);
 
 	for_each_engine(engine, i915, id) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index e77b7ed449ae..50bb7bbd26d3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -84,14 +84,9 @@ static void simulate_hibernate(struct drm_i915_private *i915)
 
 static int pm_prepare(struct drm_i915_private *i915)
 {
-	int err = 0;
-
-	if (i915_gem_suspend(i915)) {
-		pr_err("i915_gem_suspend failed\n");
-		err = -EINVAL;
-	}
+	i915_gem_suspend(i915);
 
-	return err;
+	return 0;
 }
 
 static void pm_suspend(struct drm_i915_private *i915)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] drm/i915: Refactor common code to load initial power context
  2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
@ 2019-03-08  9:36 ` Chris Wilson
  2019-03-08  9:36 ` [PATCH 3/4] drm/i915: Reduce presumption of request ordering for barriers Chris Wilson
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2019-03-08  9:36 UTC (permalink / raw)
  To: intel-gfx

We load a context (the kernel context) on both module load and resume in
order to initialise some logical state onto the GPU. We can use the same
routine for both operations, which will become more useful as we
refactor rc6/rps enabling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 48 ++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f22de3b5a1f3..539ee78f6d9a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2877,6 +2877,22 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 	return result;
 }
 
+static bool load_power_context(struct drm_i915_private *i915)
+{
+	if (!switch_to_kernel_context_sync(i915))
+		return false;
+
+	/*
+	 * Immediately park the GPU so that we enable powersaving and
+	 * treat it as idle. The next time we issue a request, we will
+	 * unpark and start using the engine->pinned_default_state, otherwise
+	 * it is in limbo and an early reset may fail.
+	 */
+	__i915_gem_park(i915);
+
+	return true;
+}
+
 static void
 i915_gem_idle_work_handler(struct work_struct *work)
 {
@@ -4451,7 +4467,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	intel_uc_resume(i915);
 
 	/* Always reload a context for powersaving. */
-	if (i915_gem_switch_to_kernel_context(i915))
+	if (!load_power_context(i915))
 		goto err_wedged;
 
 out_unlock:
@@ -4616,7 +4632,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	struct i915_gem_context *ctx;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	int err;
+	int err = 0;
 
 	/*
 	 * As we reset the gpu during very early sanitisation, the current
@@ -4649,19 +4665,12 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 			goto err_active;
 	}
 
-	if (!switch_to_kernel_context_sync(i915)) {
-		err = -EIO; /* Caller will declare us wedged */
+	/* Flush the default context image to memory, and enable powersaving. */
+	if (!load_power_context(i915)) {
+		err = -EIO;
 		goto err_active;
 	}
 
-	/*
-	 * Immediately park the GPU so that we enable powersaving and
-	 * treat it as idle. The next time we issue a request, we will
-	 * unpark and start using the engine->pinned_default_state, otherwise
-	 * it is in limbo and an early reset may fail.
-	 */
-	__i915_gem_park(i915);
-
 	for_each_engine(engine, i915, id) {
 		struct i915_vma *state;
 		void *vaddr;
@@ -4727,19 +4736,10 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 err_active:
 	/*
 	 * If we have to abandon now, we expect the engines to be idle
-	 * and ready to be torn-down. First try to flush any remaining
-	 * request, ensure we are pointing at the kernel context and
-	 * then remove it.
+	 * and ready to be torn-down. The quickest way we can accomplish
+	 * this is by declaring ourselves wedged.
 	 */
-	if (WARN_ON(i915_gem_switch_to_kernel_context(i915)))
-		goto out_ctx;
-
-	if (WARN_ON(i915_gem_wait_for_idle(i915,
-					   I915_WAIT_LOCKED,
-					   MAX_SCHEDULE_TIMEOUT)))
-		goto out_ctx;
-
-	i915_gem_contexts_lost(i915);
+	i915_gem_set_wedged(i915);
 	goto out_ctx;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] drm/i915: Reduce presumption of request ordering for barriers
  2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
  2019-03-08  9:36 ` [PATCH 2/4] drm/i915: Refactor common code to load initial power context Chris Wilson
@ 2019-03-08  9:36 ` Chris Wilson
  2019-03-08  9:36 ` [PATCH 4/4] drm/i915: Remove has-kernel-context Chris Wilson
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2019-03-08  9:36 UTC (permalink / raw)
  To: intel-gfx

Currently we assume that we know the order in which requests run and so
can determine if we need to reissue a switch-to-kernel-context prior to
idling. That assumption does not hold for the future, so instead of
tracking which barriers have been used, simply determine if we have ever
switched away from the kernel context by using the engine and before
idling ensure that all engines that have been used since the last idle
are synchronously switched back to the kernel context for safety (and
else of shrinking memory while idle).

v2: Use intel_engine_mask_t and ALL_ENGINES

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 drivers/gpu/drm/i915/i915_gem.c               | 12 ++--
 drivers/gpu/drm/i915/i915_gem_context.c       | 66 +------------------
 drivers/gpu/drm/i915/i915_gem_context.h       |  3 +-
 drivers/gpu/drm/i915/i915_gem_evict.c         |  2 +-
 drivers/gpu/drm/i915/i915_request.c           |  1 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |  5 ++
 .../gpu/drm/i915/selftests/i915_gem_context.c |  3 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  2 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  4 ++
 10 files changed, 27 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b8a5281d8adf..c4ffe19ec698 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1995,6 +1995,7 @@ struct drm_i915_private {
 			struct list_head hwsp_free_list;
 		} timelines;
 
+		intel_engine_mask_t active_engines;
 		struct list_head active_rings;
 		struct list_head closed_vma;
 		u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 539ee78f6d9a..961237b90b40 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2845,7 +2845,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 	}
 }
 
-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
+					  unsigned long mask)
 {
 	bool result = true;
 
@@ -2854,7 +2855,7 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 	 * to save itself before we report the failure. Yes, this may be a
 	 * false positive due to e.g. ENOMEM, caveat emptor!
 	 */
-	if (i915_gem_switch_to_kernel_context(i915))
+	if (i915_gem_switch_to_kernel_context(i915, mask))
 		result = false;
 
 	if (i915_gem_wait_for_idle(i915,
@@ -2879,7 +2880,8 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 
 static bool load_power_context(struct drm_i915_private *i915)
 {
-	if (!switch_to_kernel_context_sync(i915))
+	/* Force loading the kernel context on all engines */
+	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
 		return false;
 
 	/*
@@ -2927,7 +2929,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	    !i915->gt.active_requests) {
 		++i915->gt.active_requests; /* don't requeue idle */
 
-		switch_to_kernel_context_sync(i915);
+		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
 
 		if (!--i915->gt.active_requests) {
 			__i915_gem_park(i915);
@@ -4380,7 +4382,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(i915);
+	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
 
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_reset_flush(i915);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9a3eb4f66d85..486203e9d205 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -704,63 +704,10 @@ last_request_on_engine(struct i915_timeline *timeline,
 	return NULL;
 }
 
-static bool engine_has_kernel_context_barrier(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	const struct intel_context * const ce =
-		to_intel_context(i915->kernel_context, engine);
-	struct i915_timeline *barrier = ce->ring->timeline;
-	struct intel_ring *ring;
-	bool any_active = false;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
-		struct i915_request *rq;
-
-		rq = last_request_on_engine(ring->timeline, engine);
-		if (!rq)
-			continue;
-
-		any_active = true;
-
-		if (rq->hw_context == ce)
-			continue;
-
-		/*
-		 * Was this request submitted after the previous
-		 * switch-to-kernel-context?
-		 */
-		if (!i915_timeline_sync_is_later(barrier, &rq->fence)) {
-			GEM_TRACE("%s needs barrier for %llx:%lld\n",
-				  ring->timeline->name,
-				  rq->fence.context,
-				  rq->fence.seqno);
-			return false;
-		}
-
-		GEM_TRACE("%s has barrier after %llx:%lld\n",
-			  ring->timeline->name,
-			  rq->fence.context,
-			  rq->fence.seqno);
-	}
-
-	/*
-	 * If any other timeline was still active and behind the last barrier,
-	 * then our last switch-to-kernel-context must still be queued and
-	 * will run last (leaving the engine in the kernel context when it
-	 * eventually idles).
-	 */
-	if (any_active)
-		return true;
-
-	/* The engine is idle; check that it is idling in the kernel context. */
-	return engine->last_retired_context == ce;
-}
-
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
+int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
+				      unsigned long mask)
 {
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 
 	GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));
 
@@ -771,17 +718,11 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 	if (i915_terminally_wedged(i915))
 		return 0;
 
-	i915_retire_requests(i915);
-
-	for_each_engine(engine, i915, id) {
+	for_each_engine_masked(engine, i915, mask, mask) {
 		struct intel_ring *ring;
 		struct i915_request *rq;
 
 		GEM_BUG_ON(!to_intel_context(i915->kernel_context, engine));
-		if (engine_has_kernel_context_barrier(engine))
-			continue;
-
-		GEM_TRACE("emit barrier on %s\n", engine->name);
 
 		rq = i915_request_alloc(engine, i915->kernel_context);
 		if (IS_ERR(rq))
@@ -805,7 +746,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 			i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 							 &prev->submit,
 							 I915_FENCE_GFP);
-			i915_timeline_sync_set(rq->timeline, &prev->fence);
 		}
 
 		i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 2f9ef333acaa..e1188d77a23d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -372,7 +372,8 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 void i915_gem_context_close(struct drm_file *file);
 
 int i915_switch_context(struct i915_request *rq);
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv);
+int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
+				      unsigned long engine_mask);
 
 void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 68d74c50ac39..7d8e90dfca84 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -62,7 +62,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	err = i915_gem_switch_to_kernel_context(i915);
+	err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index f8a63495114c..9533a85cb0b3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1068,6 +1068,7 @@ void i915_request_add(struct i915_request *request)
 		GEM_TRACE("marking %s as active\n", ring->timeline->name);
 		list_add(&ring->active_link, &request->i915->gt.active_rings);
 	}
+	request->i915->gt.active_engines |= request->engine->mask;
 	request->emitted_jiffies = jiffies;
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 555a4590fa23..18174f808fd8 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1106,6 +1106,9 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
 
 	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
+	if (!engine->context_size)
+		return true;
+
 	/*
 	 * Check the last context seen by the engine. If active, it will be
 	 * the last request that remains in the timeline. When idle, it is
@@ -1205,6 +1208,8 @@ void intel_engines_park(struct drm_i915_private *i915)
 		i915_gem_batch_pool_fini(&engine->batch_pool);
 		engine->execlists.no_priolist = false;
 	}
+
+	i915->gt.active_engines = 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 755c4a7304b2..5b8614b2fbe4 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1512,7 +1512,8 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
 			}
 		}
 
-		err = i915_gem_switch_to_kernel_context(i915);
+		err = i915_gem_switch_to_kernel_context(i915,
+							i915->gt.active_engines);
 		if (err)
 			return err;
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index e0d3122fd35a..94aee4071a66 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -14,7 +14,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 	cond_resched();
 
 	if (flags & I915_WAIT_LOCKED &&
-	    i915_gem_switch_to_kernel_context(i915)) {
+	    i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) {
 		pr_err("Failed to switch back to kernel context; declaring wedged\n");
 		i915_gem_set_wedged(i915);
 	}
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index b2c7808e0595..54cfb611c0aa 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -109,6 +109,10 @@ static void mock_retire_work_handler(struct work_struct *work)
 
 static void mock_idle_work_handler(struct work_struct *work)
 {
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gt.idle_work.work);
+
+	i915->gt.active_engines = 0;
 }
 
 static int pm_domain_resume(struct device *dev)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] drm/i915: Remove has-kernel-context
  2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
  2019-03-08  9:36 ` [PATCH 2/4] drm/i915: Refactor common code to load initial power context Chris Wilson
  2019-03-08  9:36 ` [PATCH 3/4] drm/i915: Reduce presumption of request ordering for barriers Chris Wilson
@ 2019-03-08  9:36 ` Chris Wilson
  2019-03-08  9:50 ` [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Tvrtko Ursulin
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2019-03-08  9:36 UTC (permalink / raw)
  To: intel-gfx

We can no longer assume execution ordering, and in particular we cannot
assume which context will execute last. One side-effect of this is that
we cannot determine if the kernel-context is resident on the GPU, so
remove the routines that claimed to do so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_active.h      | 13 -----------
 drivers/gpu/drm/i915/i915_gem.c         | 21 +----------------
 drivers/gpu/drm/i915/i915_gem_evict.c   | 16 +++----------
 drivers/gpu/drm/i915/intel_engine_cs.c  | 31 -------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 5 files changed, 4 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 8142a334b37b..7d758719ce39 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -108,19 +108,6 @@ i915_active_request_set_retire_fn(struct i915_active_request *active,
 	active->retire = fn ?: i915_active_retire_noop;
 }
 
-static inline struct i915_request *
-__i915_active_request_peek(const struct i915_active_request *active)
-{
-	/*
-	 * Inside the error capture (running with the driver in an unknown
-	 * state), we want to bend the rules slightly (a lot).
-	 *
-	 * Work is in progress to make it safer, in the meantime this keeps
-	 * the known issue from spamming the logs.
-	 */
-	return rcu_dereference_protected(active->request, 1);
-}
-
 /**
  * i915_active_request_raw - return the active request
  * @active - the active tracker
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 961237b90b40..1f1849f90606 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2828,23 +2828,6 @@ i915_gem_retire_work_handler(struct work_struct *work)
 				   round_jiffies_up_relative(HZ));
 }
 
-static void assert_kernel_context_is_current(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	if (i915_reset_failed(i915))
-		return;
-
-	i915_retire_requests(i915);
-
-	for_each_engine(engine, i915, id) {
-		GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
-		GEM_BUG_ON(engine->last_retired_context !=
-			   to_intel_context(i915->kernel_context, engine));
-	}
-}
-
 static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
 					  unsigned long mask)
 {
@@ -2864,9 +2847,7 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
 				   I915_GEM_IDLE_TIMEOUT))
 		result = false;
 
-	if (result) {
-		assert_kernel_context_is_current(i915);
-	} else {
+	if (!result) {
 		/* Forcibly cancel outstanding work and leave the gpu quiet. */
 		dev_err(i915->drm.dev,
 			"Failed to idle engines, declaring wedged!\n");
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 7d8e90dfca84..060f5903544a 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -38,25 +38,15 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 
 static bool ggtt_is_idle(struct drm_i915_private *i915)
 {
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-
-       if (i915->gt.active_requests)
-	       return false;
-
-       for_each_engine(engine, i915, id) {
-	       if (!intel_engine_has_kernel_context(engine))
-		       return false;
-       }
-
-       return true;
+	return !i915->gt.active_requests;
 }
 
 static int ggtt_flush(struct drm_i915_private *i915)
 {
 	int err;
 
-	/* Not everything in the GGTT is tracked via vma (otherwise we
+	/*
+	 * Not everything in the GGTT is tracked via vma (otherwise we
 	 * could evict as required with minimal stalling) so we are forced
 	 * to idle the GPU and explicitly retire outstanding requests in
 	 * the hopes that we can then remove contexts and the like only
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 18174f808fd8..8e326556499e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1090,37 +1090,6 @@ bool intel_engines_are_idle(struct drm_i915_private *i915)
 	return true;
 }
 
-/**
- * intel_engine_has_kernel_context:
- * @engine: the engine
- *
- * Returns true if the last context to be executed on this engine, or has been
- * executed if the engine is already idle, is the kernel context
- * (#i915.kernel_context).
- */
-bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
-{
-	const struct intel_context *kernel_context =
-		to_intel_context(engine->i915->kernel_context, engine);
-	struct i915_request *rq;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	if (!engine->context_size)
-		return true;
-
-	/*
-	 * Check the last context seen by the engine. If active, it will be
-	 * the last request that remains in the timeline. When idle, it is
-	 * the last executed context as tracked by retirement.
-	 */
-	rq = __i915_active_request_peek(&engine->timeline.last_request);
-	if (rq)
-		return rq->hw_context == kernel_context;
-	else
-		return engine->last_retired_context == kernel_context;
-}
-
 void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 84b7047e2df5..9ccbe63d46e3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -935,7 +935,6 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
 void intel_engine_lost_context(struct intel_engine_cs *engine);
 
 void intel_engines_park(struct drm_i915_private *i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
                   ` (2 preceding siblings ...)
  2019-03-08  9:36 ` [PATCH 4/4] drm/i915: Remove has-kernel-context Chris Wilson
@ 2019-03-08  9:50 ` Tvrtko Ursulin
  2019-03-08 10:22 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/4] " Patchwork
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2019-03-08  9:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/03/2019 09:36, Chris Wilson wrote:
> When the system idles, we switch to the kernel context as a defensive
> measure (no users are harmed if the kernel context is lost). Currently,
> we issue a switch to kernel context and then come back later to see if
> the kernel context is still current and the system is idle. However,
> if we are no longer privy to the runqueue ordering, then we have to
> relax our assumptions about the logical state of the GPU and the only
> way to ensure that the kernel context is currently loaded is by issuing
> a request to run after all others, and wait for it to complete all while
> preventing anyone else from issuing their own requests.
> 
> v2: Pull wedging into switch_to_kernel_context_sync() but only after
> waiting (though only for the same short delay) for the active context to
> finish.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.c           |  14 +-
>   drivers/gpu/drm/i915/i915_drv.h           |   2 +-
>   drivers/gpu/drm/i915/i915_gem.c           | 154 +++++++++-------------
>   drivers/gpu/drm/i915/i915_gem_context.c   |   4 +
>   drivers/gpu/drm/i915/selftests/i915_gem.c |   9 +-
>   5 files changed, 73 insertions(+), 110 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 777a9a19414d..0d743907e7bc 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -714,8 +714,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
>   	return 0;
>   
>   cleanup_gem:
> -	if (i915_gem_suspend(dev_priv))
> -		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
> +	i915_gem_suspend(dev_priv);
>   	i915_gem_fini(dev_priv);
>   cleanup_modeset:
>   	intel_modeset_cleanup(dev);
> @@ -1900,8 +1899,7 @@ void i915_driver_unload(struct drm_device *dev)
>   	/* Flush any external code that still may be under the RCU lock */
>   	synchronize_rcu();
>   
> -	if (i915_gem_suspend(dev_priv))
> -		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
> +	i915_gem_suspend(dev_priv);
>   
>   	drm_atomic_helper_shutdown(dev);
>   
> @@ -2009,7 +2007,6 @@ static bool suspend_to_idle(struct drm_i915_private *dev_priv)
>   static int i915_drm_prepare(struct drm_device *dev)
>   {
>   	struct drm_i915_private *i915 = to_i915(dev);
> -	int err;
>   
>   	/*
>   	 * NB intel_display_suspend() may issue new requests after we've
> @@ -2017,12 +2014,9 @@ static int i915_drm_prepare(struct drm_device *dev)
>   	 * split out that work and pull it forward so that after point,
>   	 * the GPU is not woken again.
>   	 */
> -	err = i915_gem_suspend(i915);
> -	if (err)
> -		dev_err(&i915->drm.pdev->dev,
> -			"GEM idle failed, suspend/resume might fail\n");
> +	i915_gem_suspend(i915);
>   
> -	return err;
> +	return 0;
>   }
>   
>   static int i915_drm_suspend(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a5b314a0c415..b8a5281d8adf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3032,7 +3032,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv);
>   void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
>   int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
>   			   unsigned int flags, long timeout);
> -int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
> +void i915_gem_suspend(struct drm_i915_private *dev_priv);
>   void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
>   void i915_gem_resume(struct drm_i915_private *dev_priv);
>   vm_fault_t i915_gem_fault(struct vm_fault *vmf);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index df2f4f65c2a4..f22de3b5a1f3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2828,13 +2828,6 @@ i915_gem_retire_work_handler(struct work_struct *work)
>   				   round_jiffies_up_relative(HZ));
>   }
>   
> -static inline bool
> -new_requests_since_last_retire(const struct drm_i915_private *i915)
> -{
> -	return (READ_ONCE(i915->gt.active_requests) ||
> -		work_pending(&i915->gt.idle_work.work));
> -}
> -
>   static void assert_kernel_context_is_current(struct drm_i915_private *i915)
>   {
>   	struct intel_engine_cs *engine;
> @@ -2843,7 +2836,8 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
>   	if (i915_reset_failed(i915))
>   		return;
>   
> -	GEM_BUG_ON(i915->gt.active_requests);
> +	i915_retire_requests(i915);
> +
>   	for_each_engine(engine, i915, id) {
>   		GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
>   		GEM_BUG_ON(engine->last_retired_context !=
> @@ -2851,77 +2845,86 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
>   	}
>   }
>   
> +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
> +{
> +	bool result = true;
> +
> +	/*
> +	 * Even if we fail to switch, give whatever is running a small chance
> +	 * to save itself before we report the failure. Yes, this may be a
> +	 * false positive due to e.g. ENOMEM, caveat emptor!
> +	 */
> +	if (i915_gem_switch_to_kernel_context(i915))
> +		result = false;
> +
> +	if (i915_gem_wait_for_idle(i915,
> +				   I915_WAIT_LOCKED |
> +				   I915_WAIT_FOR_IDLE_BOOST,
> +				   I915_GEM_IDLE_TIMEOUT))
> +		result = false;
> +
> +	if (result) {
> +		assert_kernel_context_is_current(i915);
> +	} else {
> +		/* Forcibly cancel outstanding work and leave the gpu quiet. */
> +		dev_err(i915->drm.dev,
> +			"Failed to idle engines, declaring wedged!\n");
> +		GEM_TRACE_DUMP();
> +		i915_gem_set_wedged(i915);
> +	}
> +
> +	i915_retire_requests(i915); /* ensure we flush after wedging */
> +	return result;
> +}
> +
>   static void
>   i915_gem_idle_work_handler(struct work_struct *work)
>   {
> -	struct drm_i915_private *dev_priv =
> -		container_of(work, typeof(*dev_priv), gt.idle_work.work);
> +	struct drm_i915_private *i915 =
> +		container_of(work, typeof(*i915), gt.idle_work.work);
>   	bool rearm_hangcheck;
>   
> -	if (!READ_ONCE(dev_priv->gt.awake))
> +	if (!READ_ONCE(i915->gt.awake))
>   		return;
>   
> -	if (READ_ONCE(dev_priv->gt.active_requests))
> +	if (READ_ONCE(i915->gt.active_requests))
>   		return;
>   
> -	/*
> -	 * Flush out the last user context, leaving only the pinned
> -	 * kernel context resident. When we are idling on the kernel_context,
> -	 * no more new requests (with a context switch) are emitted and we
> -	 * can finally rest. A consequence is that the idle work handler is
> -	 * always called at least twice before idling (and if the system is
> -	 * idle that implies a round trip through the retire worker).
> -	 */
> -	mutex_lock(&dev_priv->drm.struct_mutex);
> -	i915_gem_switch_to_kernel_context(dev_priv);
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> -
> -	GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
> -		  READ_ONCE(dev_priv->gt.active_requests));
> -
> -	/*
> -	 * Wait for last execlists context complete, but bail out in case a
> -	 * new request is submitted. As we don't trust the hardware, we
> -	 * continue on if the wait times out. This is necessary to allow
> -	 * the machine to suspend even if the hardware dies, and we will
> -	 * try to recover in resume (after depriving the hardware of power,
> -	 * it may be in a better mmod).
> -	 */
> -	__wait_for(if (new_requests_since_last_retire(dev_priv)) return,
> -		   intel_engines_are_idle(dev_priv),
> -		   I915_IDLE_ENGINES_TIMEOUT * 1000,
> -		   10, 500);
> -
>   	rearm_hangcheck =
> -		cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
> +		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
>   
> -	if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
> +	if (!mutex_trylock(&i915->drm.struct_mutex)) {
>   		/* Currently busy, come back later */
> -		mod_delayed_work(dev_priv->wq,
> -				 &dev_priv->gt.idle_work,
> +		mod_delayed_work(i915->wq,
> +				 &i915->gt.idle_work,
>   				 msecs_to_jiffies(50));
>   		goto out_rearm;
>   	}
>   
>   	/*
> -	 * New request retired after this work handler started, extend active
> -	 * period until next instance of the work.
> +	 * Flush out the last user context, leaving only the pinned
> +	 * kernel context resident. Should anything unfortunate happen
> +	 * while we are idle (such as the GPU being power cycled), no users
> +	 * will be harmed.
>   	 */
> -	if (new_requests_since_last_retire(dev_priv))
> -		goto out_unlock;
> +	if (!work_pending(&i915->gt.idle_work.work) &&
> +	    !i915->gt.active_requests) {
> +		++i915->gt.active_requests; /* don't requeue idle */
>   
> -	__i915_gem_park(dev_priv);
> +		switch_to_kernel_context_sync(i915);
>   
> -	assert_kernel_context_is_current(dev_priv);
> +		if (!--i915->gt.active_requests) {
> +			__i915_gem_park(i915);
> +			rearm_hangcheck = false;
> +		}
> +	}
>   
> -	rearm_hangcheck = false;
> -out_unlock:
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> +	mutex_unlock(&i915->drm.struct_mutex);
>   
>   out_rearm:
>   	if (rearm_hangcheck) {
> -		GEM_BUG_ON(!dev_priv->gt.awake);
> -		i915_queue_hangcheck(dev_priv);
> +		GEM_BUG_ON(!i915->gt.awake);
> +		i915_queue_hangcheck(i915);
>   	}
>   }
>   
> @@ -3128,7 +3131,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
>   			return err;
>   
>   		i915_retire_requests(i915);
> -		GEM_BUG_ON(i915->gt.active_requests);
>   	}
>   
>   	return 0;
> @@ -4340,10 +4342,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>   	mutex_unlock(&i915->drm.struct_mutex);
>   }
>   
> -int i915_gem_suspend(struct drm_i915_private *i915)
> +void i915_gem_suspend(struct drm_i915_private *i915)
>   {
>   	intel_wakeref_t wakeref;
> -	int ret;
>   
>   	GEM_TRACE("\n");
>   
> @@ -4363,23 +4364,7 @@ int i915_gem_suspend(struct drm_i915_private *i915)
>   	 * state. Fortunately, the kernel_context is disposable and we do
>   	 * not rely on its state.
>   	 */
> -	if (!i915_reset_failed(i915)) {
> -		ret = i915_gem_switch_to_kernel_context(i915);
> -		if (ret)
> -			goto err_unlock;
> -
> -		ret = i915_gem_wait_for_idle(i915,
> -					     I915_WAIT_INTERRUPTIBLE |
> -					     I915_WAIT_LOCKED |
> -					     I915_WAIT_FOR_IDLE_BOOST,
> -					     I915_GEM_IDLE_TIMEOUT);
> -		if (ret == -EINTR)
> -			goto err_unlock;
> -
> -		/* Forcibly cancel outstanding work and leave the gpu quiet. */
> -		i915_gem_set_wedged(i915);
> -	}
> -	i915_retire_requests(i915); /* ensure we flush after wedging */
> +	switch_to_kernel_context_sync(i915);
>   
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	i915_reset_flush(i915);
> @@ -4399,12 +4384,6 @@ int i915_gem_suspend(struct drm_i915_private *i915)
>   	GEM_BUG_ON(i915->gt.awake);
>   
>   	intel_runtime_pm_put(i915, wakeref);
> -	return 0;
> -
> -err_unlock:
> -	mutex_unlock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_put(i915, wakeref);
> -	return ret;
>   }
>   
>   void i915_gem_suspend_late(struct drm_i915_private *i915)
> @@ -4670,20 +4649,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>   			goto err_active;
>   	}
>   
> -	err = i915_gem_switch_to_kernel_context(i915);
> -	if (err)
> -		goto err_active;
> -
> -	if (i915_gem_wait_for_idle(i915,
> -				   I915_WAIT_LOCKED,
> -				   I915_GEM_IDLE_TIMEOUT)) {
> -		i915_gem_set_wedged(i915);
> +	if (!switch_to_kernel_context_sync(i915)) {
>   		err = -EIO; /* Caller will declare us wedged */
>   		goto err_active;
>   	}
>   
> -	assert_kernel_context_is_current(i915);
> -
>   	/*
>   	 * Immediately park the GPU so that we enable powersaving and
>   	 * treat it as idle. The next time we issue a request, we will
> @@ -4927,7 +4897,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   err_init_hw:
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   
> -	WARN_ON(i915_gem_suspend(dev_priv));
> +	i915_gem_suspend(dev_priv);
>   	i915_gem_suspend_late(dev_priv);
>   
>   	i915_gem_drain_workqueue(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index b9f321947982..9a3eb4f66d85 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -767,6 +767,10 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   	GEM_BUG_ON(!i915->kernel_context);
>   
> +	/* Inoperable, so presume the GPU is safely pointing into the void! */
> +	if (i915_terminally_wedged(i915))
> +		return 0;
> +
>   	i915_retire_requests(i915);
>   
>   	for_each_engine(engine, i915, id) {
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index e77b7ed449ae..50bb7bbd26d3 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -84,14 +84,9 @@ static void simulate_hibernate(struct drm_i915_private *i915)
>   
>   static int pm_prepare(struct drm_i915_private *i915)
>   {
> -	int err = 0;
> -
> -	if (i915_gem_suspend(i915)) {
> -		pr_err("i915_gem_suspend failed\n");
> -		err = -EINVAL;
> -	}
> +	i915_gem_suspend(i915);
>   
> -	return err;
> +	return 0;
>   }
>   
>   static void pm_suspend(struct drm_i915_private *i915)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
                   ` (3 preceding siblings ...)
  2019-03-08  9:50 ` [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Tvrtko Ursulin
@ 2019-03-08 10:22 ` Patchwork
  2019-03-08 10:56 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2019-03-08 10:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
URL   : https://patchwork.freedesktop.org/series/57732/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Do a synchronous switch-to-kernel-context on idling
Okay!

Commit: drm/i915: Refactor common code to load initial power context
Okay!

Commit: drm/i915: Reduce presumption of request ordering for barriers
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3552:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3553:16: warning: expression using sizeof(void)

Commit: drm/i915: Remove has-kernel-context
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
                   ` (4 preceding siblings ...)
  2019-03-08 10:22 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/4] " Patchwork
@ 2019-03-08 10:56 ` Patchwork
  2019-03-08 12:33 ` ✗ Fi.CI.IGT: failure " Patchwork
  2019-03-08 12:44 ` [PATCH 1/4] " Chris Wilson
  7 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2019-03-08 10:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
URL   : https://patchwork.freedesktop.org/series/57732/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5720 -> Patchwork_12415
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/57732/revisions/1/mbox/

Known issues
------------

  Here are the changes found in Patchwork_12415 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_hangcheck:
    - fi-kbl-7560u:       PASS -> INCOMPLETE [fdo#108044] / [fdo#108744]

  * igt@kms_busy@basic-flip-b:
    - fi-gdg-551:         PASS -> FAIL [fdo#103182]

  * igt@kms_busy@basic-flip-c:
    - fi-byt-j1900:       NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_chamelium@hdmi-crc-fast:
    - fi-byt-j1900:       NOTRUN -> SKIP [fdo#109271] +52

  * igt@kms_pipe_crc_basic@read-crc-pipe-b:
    - fi-byt-clapper:     PASS -> FAIL [fdo#107362] +1

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - fi-byt-clapper:     PASS -> FAIL [fdo#103191] / [fdo#107362] +1

  * igt@prime_vgem@basic-fence-flip:
    - fi-ilk-650:         PASS -> FAIL [fdo#104008]

  
#### Possible fixes ####

  * igt@i915_selftest@live_evict:
    - fi-bsw-kefka:       DMESG-WARN [fdo#107709] -> PASS

  
  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#104008]: https://bugs.freedesktop.org/show_bug.cgi?id=104008
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#107709]: https://bugs.freedesktop.org/show_bug.cgi?id=107709
  [fdo#108044]: https://bugs.freedesktop.org/show_bug.cgi?id=108044
  [fdo#108744]: https://bugs.freedesktop.org/show_bug.cgi?id=108744
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278


Participating hosts (45 -> 42)
------------------------------

  Additional (1): fi-byt-j1900 
  Missing    (4): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


Build changes
-------------

    * Linux: CI_DRM_5720 -> Patchwork_12415

  CI_DRM_5720: 3539e9cecf9d8d32efd335f9e8656a547af9609f @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4876: 51b8d4cfde8d5b0176180b9683accea91474c7ff @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12415: e3a875da4f9f3f78842bc09dda0a781abb3d149c @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

e3a875da4f9f drm/i915: Remove has-kernel-context
65f65a719bb8 drm/i915: Reduce presumption of request ordering for barriers
74219cb47fed drm/i915: Refactor common code to load initial power context
73f14b8dadc5 drm/i915: Do a synchronous switch-to-kernel-context on idling

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12415/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
                   ` (5 preceding siblings ...)
  2019-03-08 10:56 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-03-08 12:33 ` Patchwork
  2019-03-08 12:35   ` Chris Wilson
  2019-03-08 12:44 ` [PATCH 1/4] " Chris Wilson
  7 siblings, 1 reply; 12+ messages in thread
From: Patchwork @ 2019-03-08 12:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
URL   : https://patchwork.freedesktop.org/series/57732/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5720_full -> Patchwork_12415_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_12415_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12415_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12415_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_eio@in-flight-suspend:
    - shard-kbl:          PASS -> DMESG-WARN
    - shard-skl:          PASS -> DMESG-WARN
    - shard-apl:          PASS -> DMESG-WARN
    - shard-snb:          PASS -> DMESG-WARN
    - shard-glk:          PASS -> DMESG-WARN
    - shard-hsw:          PASS -> DMESG-WARN

  
Known issues
------------

  Here are the changes found in Patchwork_12415_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@debugfs_test@read_all_entries:
    - shard-skl:          PASS -> INCOMPLETE [fdo#108901]

  * igt@gem_ctx_param@invalid-param-get:
    - shard-apl:          NOTRUN -> FAIL [fdo#109559]

  * igt@gem_softpin@softpin:
    - shard-hsw:          PASS -> INCOMPLETE [fdo#103540]

  * igt@i915_pm_rpm@debugfs-read:
    - shard-skl:          PASS -> INCOMPLETE [fdo#107807]

  * igt@kms_atomic_transition@3x-modeset-transitions:
    - shard-skl:          NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +2

  * igt@kms_busy@basic-flip-d:
    - shard-apl:          NOTRUN -> SKIP [fdo#109271] / [fdo#109278] +1

  * igt@kms_busy@extended-modeset-hang-newfb-render-a:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_color@pipe-c-ctm-green-to-red:
    - shard-skl:          PASS -> FAIL [fdo#107201]

  * igt@kms_color@pipe-c-legacy-gamma:
    - shard-apl:          PASS -> FAIL [fdo#104782]

  * igt@kms_cursor_crc@cursor-256x256-random:
    - shard-apl:          PASS -> FAIL [fdo#103232] +2

  * igt@kms_cursor_crc@cursor-size-change:
    - shard-glk:          PASS -> FAIL [fdo#103232]

  * igt@kms_draw_crc@draw-method-rgb565-mmap-cpu-xtiled:
    - shard-skl:          PASS -> FAIL [fdo#103184] +1

  * igt@kms_flip@flip-vs-expired-vblank:
    - shard-skl:          PASS -> FAIL [fdo#105363]

  * igt@kms_flip_tiling@flip-yf-tiled:
    - shard-skl:          PASS -> FAIL [fdo#108145]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-pwrite:
    - shard-apl:          PASS -> FAIL [fdo#103167] +1

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-indfb-msflip-blt:
    - shard-apl:          NOTRUN -> SKIP [fdo#109271] +32

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-pwrite:
    - shard-skl:          NOTRUN -> SKIP [fdo#109271] +17

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-wc:
    - shard-glk:          PASS -> FAIL [fdo#103167] +3

  * igt@kms_frontbuffer_tracking@fbc-tilingchange:
    - shard-snb:          PASS -> SKIP [fdo#109271]

  * igt@kms_plane@pixel-format-pipe-a-planes:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#106885]

  * igt@kms_plane@plane-panning-bottom-right-pipe-b-planes:
    - shard-skl:          PASS -> FAIL [fdo#103166]

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          PASS -> FAIL [fdo#107815] / [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
    - shard-apl:          NOTRUN -> FAIL [fdo#108145]

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-none:
    - shard-apl:          PASS -> FAIL [fdo#103166] +5

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-y:
    - shard-glk:          PASS -> FAIL [fdo#103166]

  * igt@kms_rotation_crc@multiplane-rotation:
    - shard-kbl:          PASS -> FAIL [fdo#109016]

  * igt@prime_vgem@basic-fence-flip:
    - shard-kbl:          PASS -> FAIL [fdo#104008]

  
#### Possible fixes ####

  * igt@kms_atomic_transition@1x-modeset-transitions-nonblocking:
    - shard-apl:          FAIL [fdo#109660] -> PASS

  * igt@kms_cursor_crc@cursor-128x128-suspend:
    - shard-apl:          FAIL [fdo#103191] / [fdo#103232] -> PASS +1

  * igt@kms_cursor_crc@cursor-64x21-random:
    - shard-apl:          FAIL [fdo#103232] -> PASS +5

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          FAIL [fdo#105363] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-pwrite:
    - shard-apl:          FAIL [fdo#103167] -> PASS +2

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-pwrite:
    - shard-glk:          FAIL [fdo#103167] -> PASS +1

  * igt@kms_frontbuffer_tracking@fbc-farfromfence:
    - shard-snb:          SKIP [fdo#109271] -> PASS

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-yf:
    - shard-glk:          FAIL [fdo#103166] -> PASS

  * igt@kms_setmode@basic:
    - shard-kbl:          FAIL [fdo#99912] -> PASS

  * igt@perf_pmu@rc6-runtime-pm:
    - shard-apl:          FAIL [fdo#105010] -> PASS
    - shard-kbl:          FAIL [fdo#105010] -> PASS

  
#### Warnings ####

  * igt@kms_cursor_crc@cursor-64x64-suspend:
    - shard-skl:          FAIL [fdo#103191] / [fdo#103232] -> INCOMPLETE [fdo#104108]

  * igt@runner@aborted:
    - shard-glk:          FAIL [fdo#109373] / [k.org#202321] -> ( 2 FAIL ) [fdo#109373] / [k.org#202321]

  
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103184]: https://bugs.freedesktop.org/show_bug.cgi?id=103184
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#104008]: https://bugs.freedesktop.org/show_bug.cgi?id=104008
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#104782]: https://bugs.freedesktop.org/show_bug.cgi?id=104782
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#106885]: https://bugs.freedesktop.org/show_bug.cgi?id=106885
  [fdo#107201]: https://bugs.freedesktop.org/show_bug.cgi?id=107201
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#107815]: https://bugs.freedesktop.org/show_bug.cgi?id=107815
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108901]: https://bugs.freedesktop.org/show_bug.cgi?id=108901
  [fdo#109016]: https://bugs.freedesktop.org/show_bug.cgi?id=109016
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109373]: https://bugs.freedesktop.org/show_bug.cgi?id=109373
  [fdo#109559]: https://bugs.freedesktop.org/show_bug.cgi?id=109559
  [fdo#109660]: https://bugs.freedesktop.org/show_bug.cgi?id=109660
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
  [k.org#202321]: https://bugzilla.kernel.org/show_bug.cgi?id=202321


Participating hosts (6 -> 6)
------------------------------

  No changes in participating hosts


Build changes
-------------

    * Linux: CI_DRM_5720 -> Patchwork_12415

  CI_DRM_5720: 3539e9cecf9d8d32efd335f9e8656a547af9609f @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4876: 51b8d4cfde8d5b0176180b9683accea91474c7ff @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12415: e3a875da4f9f3f78842bc09dda0a781abb3d149c @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12415/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for series starting with [1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-08 12:33 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2019-03-08 12:35   ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2019-03-08 12:35 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2019-03-08 12:33:01)
> #### Possible regressions ####
> 
>   * igt@gem_eio@in-flight-suspend:
>     - shard-kbl:          PASS -> DMESG-WARN
>     - shard-skl:          PASS -> DMESG-WARN
>     - shard-apl:          PASS -> DMESG-WARN
>     - shard-snb:          PASS -> DMESG-WARN
>     - shard-glk:          PASS -> DMESG-WARN
>     - shard-hsw:          PASS -> DMESG-WARN

Whoops, why yes, we do expect to hit EIO in this case.

Hmm,
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
                   ` (6 preceding siblings ...)
  2019-03-08 12:33 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2019-03-08 12:44 ` Chris Wilson
  2019-03-08 12:46   ` Chris Wilson
  7 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2019-03-08 12:44 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2019-03-08 09:36:54)
> +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
> +{
> +       bool result = true;
> +
> +       /*
> +        * Even if we fail to switch, give whatever is running a small chance
> +        * to save itself before we report the failure. Yes, this may be a
> +        * false positive due to e.g. ENOMEM, caveat emptor!
> +        */
> +       if (i915_gem_switch_to_kernel_context(i915))
> +               result = false;
> +
> +       if (i915_gem_wait_for_idle(i915,
> +                                  I915_WAIT_LOCKED |
> +                                  I915_WAIT_FOR_IDLE_BOOST,
> +                                  I915_GEM_IDLE_TIMEOUT))
> +               result = false;
> +
> +       if (result) {
> +               assert_kernel_context_is_current(i915);
> +       } else {

	if (intel_has_gpu_reset()) {

> +               /* Forcibly cancel outstanding work and leave the gpu quiet. */
> +               dev_err(i915->drm.dev,
> +                       "Failed to idle engines, declaring wedged!\n");
> +               GEM_TRACE_DUMP();

	}

To prevent it spewing just for gem_eio???

I think the error message is important, but gem_eio is a bit of an arm
twister.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-08 12:44 ` [PATCH 1/4] " Chris Wilson
@ 2019-03-08 12:46   ` Chris Wilson
  2019-03-08 12:50     ` Tvrtko Ursulin
  0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2019-03-08 12:46 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2019-03-08 12:44:16)
> Quoting Chris Wilson (2019-03-08 09:36:54)
> > +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
> > +{
> > +       bool result = true;
> > +
> > +       /*
> > +        * Even if we fail to switch, give whatever is running a small chance
> > +        * to save itself before we report the failure. Yes, this may be a
> > +        * false positive due to e.g. ENOMEM, caveat emptor!
> > +        */
> > +       if (i915_gem_switch_to_kernel_context(i915))
> > +               result = false;
> > +
> > +       if (i915_gem_wait_for_idle(i915,
> > +                                  I915_WAIT_LOCKED |
> > +                                  I915_WAIT_FOR_IDLE_BOOST,
> > +                                  I915_GEM_IDLE_TIMEOUT))
> > +               result = false;
> > +
> > +       if (result) {
> > +               assert_kernel_context_is_current(i915);
> > +       } else {
> 
>         if (intel_has_gpu_reset()) {
> 
> > +               /* Forcibly cancel outstanding work and leave the gpu quiet. */
> > +               dev_err(i915->drm.dev,
> > +                       "Failed to idle engines, declaring wedged!\n");
> > +               GEM_TRACE_DUMP();
> 
>         }
> 
> To prevent it spewing just for gem_eio???
> 
> I think the error message is important, but gem_eio is a bit of an arm
> twister.

On the other hand, gem_eio does show that this can be user invoked so is
anything above a dev_notice() required?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling
  2019-03-08 12:46   ` Chris Wilson
@ 2019-03-08 12:50     ` Tvrtko Ursulin
  0 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2019-03-08 12:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 08/03/2019 12:46, Chris Wilson wrote:
> Quoting Chris Wilson (2019-03-08 12:44:16)
>> Quoting Chris Wilson (2019-03-08 09:36:54)
>>> +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
>>> +{
>>> +       bool result = true;
>>> +
>>> +       /*
>>> +        * Even if we fail to switch, give whatever is running a small chance
>>> +        * to save itself before we report the failure. Yes, this may be a
>>> +        * false positive due to e.g. ENOMEM, caveat emptor!
>>> +        */
>>> +       if (i915_gem_switch_to_kernel_context(i915))
>>> +               result = false;
>>> +
>>> +       if (i915_gem_wait_for_idle(i915,
>>> +                                  I915_WAIT_LOCKED |
>>> +                                  I915_WAIT_FOR_IDLE_BOOST,
>>> +                                  I915_GEM_IDLE_TIMEOUT))
>>> +               result = false;
>>> +
>>> +       if (result) {
>>> +               assert_kernel_context_is_current(i915);
>>> +       } else {
>>
>>          if (intel_has_gpu_reset()) {
>>
>>> +               /* Forcibly cancel outstanding work and leave the gpu quiet. */
>>> +               dev_err(i915->drm.dev,
>>> +                       "Failed to idle engines, declaring wedged!\n");
>>> +               GEM_TRACE_DUMP();
>>
>>          }
>>
>> To prevent it spewing just for gem_eio???
>>
>> I think the error message is important, but gem_eio is a bit of an arm
>> twister.
> 
> On the other hand, gem_eio does show that this can be user invoked so is
> anything above a dev_notice() required?

Varying log level depending on intel_has_gpu_reset sounds like the answer.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-03-08 12:50 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-08  9:36 [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
2019-03-08  9:36 ` [PATCH 2/4] drm/i915: Refactor common code to load initial power context Chris Wilson
2019-03-08  9:36 ` [PATCH 3/4] drm/i915: Reduce presumption of request ordering for barriers Chris Wilson
2019-03-08  9:36 ` [PATCH 4/4] drm/i915: Remove has-kernel-context Chris Wilson
2019-03-08  9:50 ` [PATCH 1/4] drm/i915: Do a synchronous switch-to-kernel-context on idling Tvrtko Ursulin
2019-03-08 10:22 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/4] " Patchwork
2019-03-08 10:56 ` ✓ Fi.CI.BAT: success " Patchwork
2019-03-08 12:33 ` ✗ Fi.CI.IGT: failure " Patchwork
2019-03-08 12:35   ` Chris Wilson
2019-03-08 12:44 ` [PATCH 1/4] " Chris Wilson
2019-03-08 12:46   ` Chris Wilson
2019-03-08 12:50     ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.