All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
@ 2018-03-20  0:18 Chris Wilson
  2018-03-20  0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
                   ` (8 more replies)
  0 siblings, 9 replies; 19+ messages in thread
From: Chris Wilson @ 2018-03-20  0:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

Not all callers want the GPU error to handled in the same way, so expose
a control parameter. In the first instance, some callers do not want the
heavyweight error capture so add a bit to request the state to be
captured and saved.

v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
reason for the reset in the dev_notice(), superseding the earlier option
to not print that notice.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c              |  4 +--
 drivers/gpu/drm/i915/i915_drv.c                  | 17 +++++------
 drivers/gpu/drm/i915/i915_drv.h                  | 10 +++---
 drivers/gpu/drm/i915/i915_irq.c                  | 39 +++++++++++++-----------
 drivers/gpu/drm/i915/intel_hangcheck.c           | 13 ++++----
 drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
 6 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 964ea1a12357..7816cd53100a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4011,8 +4011,8 @@ i915_wedged_set(void *data, u64 val)
 		engine->hangcheck.stalled = true;
 	}
 
-	i915_handle_error(i915, val, "Manually set wedged engine mask = %llx",
-			  val);
+	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
+			  "Manually set wedged engine mask = %llx", val);
 
 	wait_on_bit(&i915->gpu_error.flags,
 		    I915_RESET_HANDOFF,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1021bf40e236..204fa07e8f79 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1869,7 +1869,7 @@ static int i915_resume_switcheroo(struct drm_device *dev)
 /**
  * i915_reset - reset chip after a hang
  * @i915: #drm_i915_private to reset
- * @flags: Instructions
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
  *
  * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
  * on failure.
@@ -1884,7 +1884,7 @@ static int i915_resume_switcheroo(struct drm_device *dev)
  *   - re-init interrupt state
  *   - re-init display
  */
-void i915_reset(struct drm_i915_private *i915, unsigned int flags)
+void i915_reset(struct drm_i915_private *i915, const char *msg)
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
 	int ret;
@@ -1901,8 +1901,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
 	if (!i915_gem_unset_wedged(i915))
 		goto wakeup;
 
-	if (!(flags & I915_RESET_QUIET))
-		dev_notice(i915->drm.dev, "Resetting chip after gpu hang\n");
+	if (msg)
+		dev_notice(i915->drm.dev, "Resetting chip for %s\n", msg);
 	error->reset_count++;
 
 	disable_irq(i915->drm.irq);
@@ -2003,7 +2003,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
 /**
  * i915_reset_engine - reset GPU engine to recover from a hang
  * @engine: engine to reset
- * @flags: options
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
  *
  * Reset a specific GPU engine. Useful if a hang is detected.
  * Returns zero on successful reset or otherwise an error code.
@@ -2013,7 +2013,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
  *  - reset engine (which will force the engine to idle)
  *  - re-init/configure engine
  */
-int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
+int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 {
 	struct i915_gpu_error *error = &engine->i915->gpu_error;
 	struct i915_request *active_request;
@@ -2028,10 +2028,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
 		goto out;
 	}
 
-	if (!(flags & I915_RESET_QUIET)) {
+	if (msg)
 		dev_notice(engine->i915->drm.dev,
-			   "Resetting %s after gpu hang\n", engine->name);
-	}
+			   "Resetting %s for %s\n", engine->name, msg);
 	error->reset_engine_count[engine->id]++;
 
 	if (!engine->i915->guc.execbuf_client)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e27ba8fb64e6..29ef6c16bbe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2700,10 +2700,8 @@ extern void i915_driver_unload(struct drm_device *dev);
 extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
 extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
 
-#define I915_RESET_QUIET BIT(0)
-extern void i915_reset(struct drm_i915_private *i915, unsigned int flags);
-extern int i915_reset_engine(struct intel_engine_cs *engine,
-			     unsigned int flags);
+extern void i915_reset(struct drm_i915_private *i915, const char *msg);
+extern int i915_reset_engine(struct intel_engine_cs *engine, const char *msg);
 
 extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
 extern int intel_reset_guc(struct drm_i915_private *dev_priv);
@@ -2751,10 +2749,12 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 			   &dev_priv->gpu_error.hangcheck_work, delay);
 }
 
-__printf(3, 4)
+__printf(4, 5)
 void i915_handle_error(struct drm_i915_private *dev_priv,
 		       u32 engine_mask,
+		       unsigned long flags,
 		       const char *fmt, ...);
+#define I915_ERROR_CAPTURE BIT(0)
 
 extern void intel_irq_init(struct drm_i915_private *dev_priv);
 extern void intel_irq_fini(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 44eef355e12c..dbdb11ec38f6 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2877,14 +2877,8 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
-/**
- * i915_reset_device - do process context error handling work
- * @dev_priv: i915 device private
- *
- * Fire an error uevent so userspace can see that a hang or error
- * was detected.
- */
-static void i915_reset_device(struct drm_i915_private *dev_priv)
+static void i915_reset_device(struct drm_i915_private *dev_priv,
+			      const char *msg)
 {
 	struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
@@ -2910,7 +2904,7 @@ static void i915_reset_device(struct drm_i915_private *dev_priv)
 		 */
 		do {
 			if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-				i915_reset(dev_priv, 0);
+				i915_reset(dev_priv, msg);
 				mutex_unlock(&dev_priv->drm.struct_mutex);
 			}
 		} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
@@ -2955,6 +2949,7 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
  * i915_handle_error - handle a gpu error
  * @dev_priv: i915 device private
  * @engine_mask: mask representing engines that are hung
+ * @flags: control flags
  * @fmt: Error message format string
  *
  * Do some basic checking of register state at error time and
@@ -2965,16 +2960,23 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
  */
 void i915_handle_error(struct drm_i915_private *dev_priv,
 		       u32 engine_mask,
+		       unsigned long flags,
 		       const char *fmt, ...)
 {
 	struct intel_engine_cs *engine;
 	unsigned int tmp;
-	va_list args;
 	char error_msg[80];
+	char *msg = NULL;
+
+	if (fmt) {
+		va_list args;
 
-	va_start(args, fmt);
-	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
-	va_end(args);
+		va_start(args, fmt);
+		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+		va_end(args);
+
+		msg = error_msg;
+	}
 
 	/*
 	 * In most cases it's guaranteed that we get here with an RPM
@@ -2986,8 +2988,11 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	intel_runtime_pm_get(dev_priv);
 
 	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
-	i915_capture_error_state(dev_priv, engine_mask, error_msg);
-	i915_clear_error_registers(dev_priv);
+
+	if (flags & I915_ERROR_CAPTURE) {
+		i915_capture_error_state(dev_priv, engine_mask, msg);
+		i915_clear_error_registers(dev_priv);
+	}
 
 	/*
 	 * Try engine reset when available. We fall back to full reset if
@@ -3000,7 +3005,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 					     &dev_priv->gpu_error.flags))
 				continue;
 
-			if (i915_reset_engine(engine, 0) == 0)
+			if (i915_reset_engine(engine, msg) == 0)
 				engine_mask &= ~intel_engine_flag(engine);
 
 			clear_bit(I915_RESET_ENGINE + engine->id,
@@ -3030,7 +3035,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 				    TASK_UNINTERRUPTIBLE);
 	}
 
-	i915_reset_device(dev_priv);
+	i915_reset_device(dev_priv, msg);
 
 	for_each_engine(engine, dev_priv, tmp) {
 		clear_bit(I915_RESET_ENGINE + engine->id,
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 42e45ae87393..fd0ffb8328d0 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 	 */
 	tmp = I915_READ_CTL(engine);
 	if (tmp & RING_WAIT) {
-		i915_handle_error(dev_priv, 0,
-				  "Kicking stuck wait on %s",
-				  engine->name);
+		i915_handle_error(dev_priv, BIT(engine->id), 0,
+				  "stuck wait on %s", engine->name);
 		I915_WRITE_CTL(engine, tmp);
 		return ENGINE_WAIT_KICK;
 	}
@@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 		default:
 			return ENGINE_DEAD;
 		case 1:
-			i915_handle_error(dev_priv, 0,
-					  "Kicking stuck semaphore on %s",
+			i915_handle_error(dev_priv, ALL_ENGINES, 0,
+					  "stuck semaphore on %s",
 					  engine->name);
 			I915_WRITE_CTL(engine, tmp);
 			return ENGINE_WAIT_KICK;
@@ -386,13 +385,13 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
 	if (stuck != hung)
 		hung &= ~stuck;
 	len = scnprintf(msg, sizeof(msg),
-			"%s on ", stuck == hung ? "No progress" : "Hang");
+			"%s on ", stuck == hung ? "no progress" : "hang");
 	for_each_engine_masked(engine, i915, hung, tmp)
 		len += scnprintf(msg + len, sizeof(msg) - len,
 				 "%s, ", engine->name);
 	msg[len-2] = '\0';
 
-	return i915_handle_error(i915, hung, "%s", msg);
+	return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index df7898c8edcb..12682d985b9f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -433,7 +433,7 @@ static int igt_global_reset(void *arg)
 	mutex_lock(&i915->drm.struct_mutex);
 	reset_count = i915_reset_count(&i915->gpu_error);
 
-	i915_reset(i915, I915_RESET_QUIET);
+	i915_reset(i915, NULL);
 
 	if (i915_reset_count(&i915->gpu_error) == reset_count) {
 		pr_err("No GPU reset recorded!\n");
@@ -518,7 +518,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 			engine->hangcheck.seqno =
 				intel_engine_get_seqno(engine);
 
-			err = i915_reset_engine(engine, I915_RESET_QUIET);
+			err = i915_reset_engine(engine, NULL);
 			if (err) {
 				pr_err("i915_reset_engine failed\n");
 				break;
@@ -725,7 +725,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915,
 			engine->hangcheck.seqno =
 				intel_engine_get_seqno(engine);
 
-			err = i915_reset_engine(engine, I915_RESET_QUIET);
+			err = i915_reset_engine(engine, NULL);
 			if (err) {
 				pr_err("i915_reset_engine(%s:%s) failed, err=%d\n",
 				       engine->name, active ? "active" : "idle", err);
@@ -865,7 +865,6 @@ static int igt_wait_reset(void *arg)
 		       __func__, rq->fence.seqno, hws_seqno(&h, rq));
 		intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
 
-		i915_reset(i915, 0);
 		i915_gem_set_wedged(i915);
 
 		err = -EIO;
@@ -962,7 +961,6 @@ static int igt_reset_queue(void *arg)
 				i915_request_put(rq);
 				i915_request_put(prev);
 
-				i915_reset(i915, 0);
 				i915_gem_set_wedged(i915);
 
 				err = -EIO;
@@ -971,7 +969,7 @@ static int igt_reset_queue(void *arg)
 
 			reset_count = fake_hangcheck(prev);
 
-			i915_reset(i915, I915_RESET_QUIET);
+			i915_reset(i915, NULL);
 
 			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
 					    &i915->gpu_error.flags));
@@ -1069,7 +1067,6 @@ static int igt_handle_error(void *arg)
 		       __func__, rq->fence.seqno, hws_seqno(&h, rq));
 		intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
 
-		i915_reset(i915, 0);
 		i915_gem_set_wedged(i915);
 
 		err = -EIO;
@@ -1084,7 +1081,7 @@ static int igt_handle_error(void *arg)
 	engine->hangcheck.stalled = true;
 	engine->hangcheck.seqno = intel_engine_get_seqno(engine);
 
-	i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
+	i915_handle_error(i915, intel_engine_flag(engine), 0, NULL);
 
 	xchg(&i915->gpu_error.first_error, error);
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context()
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
@ 2018-03-20  0:18 ` Chris Wilson
  2018-03-22 15:26   ` Jeff McGee
  2018-03-20  0:18 ` [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends Chris Wilson
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20  0:18 UTC (permalink / raw)
  To: intel-gfx

As a complement to inject_preempt_context(), follow up with the function
to handle its completion. This will be useful should we wish to extend
the duties of the preempt-context for execlists.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 53f1c009ed7b..0bfaeb56b8c7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -531,8 +531,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	if (execlists->ctrl_reg)
 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
 
-	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
-	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
+static void complete_preempt_context(struct intel_engine_execlists *execlists)
+{
+	execlists_cancel_port_requests(execlists);
+	execlists_unwind_incomplete_requests(execlists);
+
+	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -939,14 +948,7 @@ static void execlists_submission_tasklet(unsigned long data)
 			if (status & GEN8_CTX_STATUS_COMPLETE &&
 			    buf[2*head + 1] == execlists->preempt_complete_status) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
-
-				execlists_cancel_port_requests(execlists);
-				execlists_unwind_incomplete_requests(execlists);
-
-				GEM_BUG_ON(!execlists_is_active(execlists,
-								EXECLISTS_ACTIVE_PREEMPT));
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_PREEMPT);
+				complete_preempt_context(execlists);
 				continue;
 			}
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
  2018-03-20  0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
@ 2018-03-20  0:18 ` Chris Wilson
  2018-03-22 15:28   ` Jeff McGee
  2018-03-20  0:18 ` [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions Chris Wilson
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20  0:18 UTC (permalink / raw)
  To: intel-gfx

In preparation to more carefully handling incomplete preemption during
reset by execlists, we move the existing code wholesale to the backends
under a couple of new reset vfuncs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         | 42 ++++----------------------
 drivers/gpu/drm/i915/intel_lrc.c        | 52 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  9 ++++--
 4 files changed, 78 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 802df8e1a544..38f7160d99c9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2917,7 +2917,7 @@ static bool engine_stalled(struct intel_engine_cs *engine)
 struct i915_request *
 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 {
-	struct i915_request *request = NULL;
+	struct i915_request *request;
 
 	/*
 	 * During the reset sequence, we must prevent the engine from
@@ -2940,40 +2940,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 	 */
 	kthread_park(engine->breadcrumbs.signaler);
 
-	/*
-	 * Prevent request submission to the hardware until we have
-	 * completed the reset in i915_gem_reset_finish(). If a request
-	 * is completed by one engine, it may then queue a request
-	 * to a second via its execlists->tasklet *just* as we are
-	 * calling engine->init_hw() and also writing the ELSP.
-	 * Turning off the execlists->tasklet until the reset is over
-	 * prevents the race.
-	 *
-	 * Note that this needs to be a single atomic operation on the
-	 * tasklet (flush existing tasks, prevent new tasks) to prevent
-	 * a race between reset and set-wedged. It is not, so we do the best
-	 * we can atm and make sure we don't lock the machine up in the more
-	 * common case of recursively being called from set-wedged from inside
-	 * i915_reset.
-	 */
-	if (!atomic_read(&engine->execlists.tasklet.count))
-		tasklet_kill(&engine->execlists.tasklet);
-	tasklet_disable(&engine->execlists.tasklet);
-
-	/*
-	 * We're using worker to queue preemption requests from the tasklet in
-	 * GuC submission mode.
-	 * Even though tasklet was disabled, we may still have a worker queued.
-	 * Let's make sure that all workers scheduled before disabling the
-	 * tasklet are completed before continuing with the reset.
-	 */
-	if (engine->i915->guc.preempt_wq)
-		flush_workqueue(engine->i915->guc.preempt_wq);
-
-	if (engine->irq_seqno_barrier)
-		engine->irq_seqno_barrier(engine);
-
-	request = i915_gem_find_active_request(engine);
+	request = engine->reset.prepare(engine);
 	if (request && request->fence.error == -EIO)
 		request = ERR_PTR(-EIO); /* Previous reset failed! */
 
@@ -3120,7 +3087,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
 	}
 
 	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset_hw(engine, request);
+	engine->reset.reset(engine, request);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
@@ -3172,7 +3139,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
 {
-	tasklet_enable(&engine->execlists.tasklet);
+	engine->reset.finish(engine);
+
 	kthread_unpark(engine->breadcrumbs.signaler);
 
 	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0bfaeb56b8c7..f662a9524233 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1663,6 +1663,44 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
+static struct i915_request *
+execlists_reset_prepare(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+
+	/*
+	 * Prevent request submission to the hardware until we have
+	 * completed the reset in i915_gem_reset_finish(). If a request
+	 * is completed by one engine, it may then queue a request
+	 * to a second via its execlists->tasklet *just* as we are
+	 * calling engine->init_hw() and also writing the ELSP.
+	 * Turning off the execlists->tasklet until the reset is over
+	 * prevents the race.
+	 *
+	 * Note that this needs to be a single atomic operation on the
+	 * tasklet (flush existing tasks, prevent new tasks) to prevent
+	 * a race between reset and set-wedged. It is not, so we do the best
+	 * we can atm and make sure we don't lock the machine up in the more
+	 * common case of recursively being called from set-wedged from inside
+	 * i915_reset.
+	 */
+	if (!atomic_read(&execlists->tasklet.count))
+		tasklet_kill(&execlists->tasklet);
+	tasklet_disable(&execlists->tasklet);
+
+	/*
+	 * We're using worker to queue preemption requests from the tasklet in
+	 * GuC submission mode.
+	 * Even though tasklet was disabled, we may still have a worker queued.
+	 * Let's make sure that all workers scheduled before disabling the
+	 * tasklet are completed before continuing with the reset.
+	 */
+	if (engine->i915->guc.preempt_wq)
+		flush_workqueue(engine->i915->guc.preempt_wq);
+
+	return i915_gem_find_active_request(engine);
+}
+
 static void reset_irq(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -1692,8 +1730,8 @@ static void reset_irq(struct intel_engine_cs *engine)
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
-static void reset_common_ring(struct intel_engine_cs *engine,
-			      struct i915_request *request)
+static void execlists_reset(struct intel_engine_cs *engine,
+			    struct i915_request *request)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct intel_context *ce;
@@ -1766,6 +1804,11 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	unwind_wa_tail(request);
 }
 
+static void execlists_reset_finish(struct intel_engine_cs *engine)
+{
+	tasklet_enable(&engine->execlists.tasklet);
+}
+
 static int intel_logical_ring_emit_pdps(struct i915_request *rq)
 {
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
@@ -2090,7 +2133,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
 	engine->init_hw = gen8_init_common_ring;
-	engine->reset_hw = reset_common_ring;
+
+	engine->reset.prepare = execlists_reset_prepare;
+	engine->reset.reset = execlists_reset;
+	engine->reset.finish = execlists_reset_finish;
 
 	engine->context_pin = execlists_context_pin;
 	engine->context_unpin = execlists_context_unpin;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 04d9d9a946a7..eebcc877ef60 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -530,8 +530,16 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static void reset_ring_common(struct intel_engine_cs *engine,
-			      struct i915_request *request)
+static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+{
+	if (engine->irq_seqno_barrier)
+		engine->irq_seqno_barrier(engine);
+
+	return i915_gem_find_active_request(engine);
+}
+
+static void reset_ring(struct intel_engine_cs *engine,
+		       struct i915_request *request)
 {
 	/*
 	 * RC6 must be prevented until the reset is complete and the engine
@@ -595,6 +603,10 @@ static void reset_ring_common(struct intel_engine_cs *engine,
 	}
 }
 
+static void reset_finish(struct intel_engine_cs *engine)
+{
+}
+
 static int intel_rcs_ctx_init(struct i915_request *rq)
 {
 	int ret;
@@ -1987,7 +1999,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	intel_ring_init_semaphores(dev_priv, engine);
 
 	engine->init_hw = init_ring_common;
-	engine->reset_hw = reset_ring_common;
+	engine->reset.prepare = reset_prepare;
+	engine->reset.reset = reset_ring;
+	engine->reset.finish = reset_finish;
 
 	engine->context_pin = intel_ring_context_pin;
 	engine->context_unpin = intel_ring_context_unpin;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1f50727a5ddb..e2681303ce21 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -418,8 +418,13 @@ struct intel_engine_cs {
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
 	int		(*init_hw)(struct intel_engine_cs *engine);
-	void		(*reset_hw)(struct intel_engine_cs *engine,
-				    struct i915_request *rq);
+
+	struct {
+		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
+		void (*reset)(struct intel_engine_cs *engine,
+			      struct i915_request *rq);
+		void (*finish)(struct intel_engine_cs *engine);
+	} reset;
 
 	void		(*park)(struct intel_engine_cs *engine);
 	void		(*unpark)(struct intel_engine_cs *engine);
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
  2018-03-20  0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
  2018-03-20  0:18 ` [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends Chris Wilson
@ 2018-03-20  0:18 ` Chris Wilson
  2018-03-22 15:28   ` Jeff McGee
  2018-03-20  0:18 ` [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset Chris Wilson
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20  0:18 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will make the execlists reset prepare callback
take into account preemption by flushing the context-switch handler.
This is not applicable to the GuC submission backend, so split the two
into their own backend callbacks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
 drivers/gpu/drm/i915/intel_guc_submission.c | 39 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c            | 11 +-------
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 207cda062626..779c8d3156e5 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -776,6 +776,44 @@ static void guc_submission_tasklet(unsigned long data)
 		guc_dequeue(engine);
 }
 
+static struct i915_request *
+guc_reset_prepare(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+
+	/*
+	 * Prevent request submission to the hardware until we have
+	 * completed the reset in i915_gem_reset_finish(). If a request
+	 * is completed by one engine, it may then queue a request
+	 * to a second via its execlists->tasklet *just* as we are
+	 * calling engine->init_hw() and also writing the ELSP.
+	 * Turning off the execlists->tasklet until the reset is over
+	 * prevents the race.
+	 *
+	 * Note that this needs to be a single atomic operation on the
+	 * tasklet (flush existing tasks, prevent new tasks) to prevent
+	 * a race between reset and set-wedged. It is not, so we do the best
+	 * we can atm and make sure we don't lock the machine up in the more
+	 * common case of recursively being called from set-wedged from inside
+	 * i915_reset.
+	 */
+	if (!atomic_read(&execlists->tasklet.count))
+		tasklet_kill(&execlists->tasklet);
+	tasklet_disable(&execlists->tasklet);
+
+	/*
+	 * We're using worker to queue preemption requests from the tasklet in
+	 * GuC submission mode.
+	 * Even though tasklet was disabled, we may still have a worker queued.
+	 * Let's make sure that all workers scheduled before disabling the
+	 * tasklet are completed before continuing with the reset.
+	 */
+	if (engine->i915->guc.preempt_wq)
+		flush_workqueue(engine->i915->guc.preempt_wq);
+
+	return i915_gem_find_active_request(engine);
+}
+
 /*
  * Everything below here is concerned with setup & teardown, and is
  * therefore not part of the somewhat time-critical batch-submission
@@ -1235,6 +1273,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 			&engine->execlists;
 
 		execlists->tasklet.func = guc_submission_tasklet;
+		engine->reset.prepare = guc_reset_prepare;
 		engine->park = guc_submission_park;
 		engine->unpark = guc_submission_unpark;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f662a9524233..e5a3ffbc273a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1688,16 +1688,6 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 		tasklet_kill(&execlists->tasklet);
 	tasklet_disable(&execlists->tasklet);
 
-	/*
-	 * We're using worker to queue preemption requests from the tasklet in
-	 * GuC submission mode.
-	 * Even though tasklet was disabled, we may still have a worker queued.
-	 * Let's make sure that all workers scheduled before disabling the
-	 * tasklet are completed before continuing with the reset.
-	 */
-	if (engine->i915->guc.preempt_wq)
-		flush_workqueue(engine->i915->guc.preempt_wq);
-
 	return i915_gem_find_active_request(engine);
 }
 
@@ -2115,6 +2105,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->cancel_requests = execlists_cancel_requests;
 	engine->schedule = execlists_schedule;
 	engine->execlists.tasklet.func = execlists_submission_tasklet;
+	engine->reset.prepare = execlists_reset_prepare;
 
 	engine->park = NULL;
 	engine->unpark = NULL;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
                   ` (2 preceding siblings ...)
  2018-03-20  0:18 ` [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions Chris Wilson
@ 2018-03-20  0:18 ` Chris Wilson
  2018-03-21  0:17   ` Jeff McGee
  2018-03-20  0:39 ` [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Michel Thierry
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20  0:18 UTC (permalink / raw)
  To: intel-gfx

Catch up with the inflight CSB events, after disabling the tasklet
before deciding which request was truly guilty of hanging the GPU.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
 1 file changed, 197 insertions(+), 158 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e5a3ffbc273a..beb81f13a3cc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	local_irq_restore(flags);
 }
 
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
+static void process_csb(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port * const port = execlists->port;
-	struct drm_i915_private *dev_priv = engine->i915;
+	struct drm_i915_private *i915 = engine->i915;
+	unsigned int head, tail;
+	const u32 *buf;
 	bool fw = false;
 
-	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
-	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
-	 * not be relinquished until the device is idle (see
-	 * i915_gem_idle_work_handler()). As a precaution, we make sure
-	 * that all ELSP are drained i.e. we have processed the CSB,
-	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
-	 */
-	GEM_BUG_ON(!dev_priv->gt.awake);
+	if (unlikely(execlists->csb_use_mmio)) {
+		buf = (u32 * __force)
+			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+		execlists->csb_head = -1; /* force mmio read of CSB ptrs */
+	} else {
+		/* The HWSP contains a (cacheable) mirror of the CSB */
+		buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+	}
 
-	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
-	 * imposing the cost of a locked atomic transaction when submitting a
-	 * new request (outside of the context-switch interrupt).
+	/*
+	 * The write will be ordered by the uncached read (itself
+	 * a memory barrier), so we do not need another in the form
+	 * of a locked instruction. The race between the interrupt
+	 * handler and the split test/clear is harmless as we order
+	 * our clear before the CSB read. If the interrupt arrived
+	 * first between the test and the clear, we read the updated
+	 * CSB and clear the bit. If the interrupt arrives as we read
+	 * the CSB or later (i.e. after we had cleared the bit) the bit
+	 * is set and we do a new loop.
 	 */
-	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
-		/* The HWSP contains a (cacheable) mirror of the CSB */
-		const u32 *buf =
-			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-		unsigned int head, tail;
-
-		if (unlikely(execlists->csb_use_mmio)) {
-			buf = (u32 * __force)
-				(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-			execlists->csb_head = -1; /* force mmio read of CSB ptrs */
-		}
+	__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+		intel_uncore_forcewake_get(i915, execlists->fw_domains);
+		fw = true;
+
+		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+		tail = GEN8_CSB_WRITE_PTR(head);
+		head = GEN8_CSB_READ_PTR(head);
+		execlists->csb_head = head;
+	} else {
+		const int write_idx =
+			intel_hws_csb_write_index(i915) -
+			I915_HWS_CSB_BUF0_INDEX;
+
+		head = execlists->csb_head;
+		tail = READ_ONCE(buf[write_idx]);
+	}
+	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+		  engine->name,
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+
+	while (head != tail) {
+		struct i915_request *rq;
+		unsigned int status;
+		unsigned int count;
+
+		if (++head == GEN8_CSB_ENTRIES)
+			head = 0;
 
-		/* The write will be ordered by the uncached read (itself
-		 * a memory barrier), so we do not need another in the form
-		 * of a locked instruction. The race between the interrupt
-		 * handler and the split test/clear is harmless as we order
-		 * our clear before the CSB read. If the interrupt arrived
-		 * first between the test and the clear, we read the updated
-		 * CSB and clear the bit. If the interrupt arrives as we read
-		 * the CSB or later (i.e. after we had cleared the bit) the bit
-		 * is set and we do a new loop.
+		/*
+		 * We are flying near dragons again.
+		 *
+		 * We hold a reference to the request in execlist_port[]
+		 * but no more than that. We are operating in softirq
+		 * context and so cannot hold any mutex or sleep. That
+		 * prevents us stopping the requests we are processing
+		 * in port[] from being retired simultaneously (the
+		 * breadcrumb will be complete before we see the
+		 * context-switch). As we only hold the reference to the
+		 * request, any pointer chasing underneath the request
+		 * is subject to a potential use-after-free. Thus we
+		 * store all of the bookkeeping within port[] as
+		 * required, and avoid using unguarded pointers beneath
+		 * request itself. The same applies to the atomic
+		 * status notifier.
 		 */
-		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		if (unlikely(execlists->csb_head == -1)) { /* following a reset */
-			if (!fw) {
-				intel_uncore_forcewake_get(dev_priv,
-							   execlists->fw_domains);
-				fw = true;
-			}
 
-			head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-			tail = GEN8_CSB_WRITE_PTR(head);
-			head = GEN8_CSB_READ_PTR(head);
-			execlists->csb_head = head;
-		} else {
-			const int write_idx =
-				intel_hws_csb_write_index(dev_priv) -
-				I915_HWS_CSB_BUF0_INDEX;
+		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+			  engine->name, head,
+			  status, buf[2*head + 1],
+			  execlists->active);
+
+		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+			      GEN8_CTX_STATUS_PREEMPTED))
+			execlists_set_active(execlists,
+					     EXECLISTS_ACTIVE_HWACK);
+		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+			execlists_clear_active(execlists,
+					       EXECLISTS_ACTIVE_HWACK);
+
+		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+			continue;
 
-			head = execlists->csb_head;
-			tail = READ_ONCE(buf[write_idx]);
-		}
-		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-			  engine->name,
-			  head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-			  tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+		/* We should never get a COMPLETED | IDLE_ACTIVE! */
+		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-		while (head != tail) {
-			struct i915_request *rq;
-			unsigned int status;
-			unsigned int count;
+		if (status & GEN8_CTX_STATUS_COMPLETE &&
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
+			GEM_TRACE("%s preempt-idle\n", engine->name);
+			complete_preempt_context(execlists);
+			continue;
+		}
 
-			if (++head == GEN8_CSB_ENTRIES)
-				head = 0;
+		if (status & GEN8_CTX_STATUS_PREEMPTED &&
+		    execlists_is_active(execlists,
+					EXECLISTS_ACTIVE_PREEMPT))
+			continue;
 
-			/* We are flying near dragons again.
-			 *
-			 * We hold a reference to the request in execlist_port[]
-			 * but no more than that. We are operating in softirq
-			 * context and so cannot hold any mutex or sleep. That
-			 * prevents us stopping the requests we are processing
-			 * in port[] from being retired simultaneously (the
-			 * breadcrumb will be complete before we see the
-			 * context-switch). As we only hold the reference to the
-			 * request, any pointer chasing underneath the request
-			 * is subject to a potential use-after-free. Thus we
-			 * store all of the bookkeeping within port[] as
-			 * required, and avoid using unguarded pointers beneath
-			 * request itself. The same applies to the atomic
-			 * status notifier.
-			 */
+		GEM_BUG_ON(!execlists_is_active(execlists,
+						EXECLISTS_ACTIVE_USER));
 
-			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
-			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
-				  engine->name, head,
-				  status, buf[2*head + 1],
-				  execlists->active);
-
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_HWACK);
-
-			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-				continue;
+		rq = port_unpack(port, &count);
+		GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+			  engine->name,
+			  port->context_id, count,
+			  rq ? rq->global_seqno : 0,
+			  rq ? rq_prio(rq) : 0);
+
+		/* Check the context/desc id for this event matches */
+		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+		GEM_BUG_ON(count == 0);
+		if (--count == 0) {
+			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+			GEM_BUG_ON(port_isset(&port[1]) &&
+				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+			GEM_BUG_ON(!i915_request_completed(rq));
+			execlists_context_schedule_out(rq);
+			trace_i915_request_out(rq);
+			i915_request_put(rq);
+
+			GEM_TRACE("%s completed ctx=%d\n",
+				  engine->name, port->context_id);
+
+			execlists_port_complete(execlists, port);
+		} else {
+			port_set(port, port_pack(rq, count));
+		}
 
-			/* We should never get a COMPLETED | IDLE_ACTIVE! */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		/* After the final element, the hw should be idle */
+		GEM_BUG_ON(port_count(port) == 0 &&
+			   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+		if (port_count(port) == 0)
+			execlists_clear_active(execlists,
+					       EXECLISTS_ACTIVE_USER);
+	}
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
-				GEM_TRACE("%s preempt-idle\n", engine->name);
-				complete_preempt_context(execlists);
-				continue;
-			}
+	if (head != execlists->csb_head) {
+		execlists->csb_head = head;
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+	}
 
-			if (status & GEN8_CTX_STATUS_PREEMPTED &&
-			    execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_PREEMPT))
-				continue;
+	if (unlikely(fw))
+		intel_uncore_forcewake_put(i915, execlists->fw_domains);
+}
 
-			GEM_BUG_ON(!execlists_is_active(execlists,
-							EXECLISTS_ACTIVE_USER));
-
-			rq = port_unpack(port, &count);
-			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
-				  engine->name,
-				  port->context_id, count,
-				  rq ? rq->global_seqno : 0,
-				  rq ? rq_prio(rq) : 0);
-
-			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
-			GEM_BUG_ON(count == 0);
-			if (--count == 0) {
-				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(port_isset(&port[1]) &&
-					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-				GEM_BUG_ON(!i915_request_completed(rq));
-				execlists_context_schedule_out(rq);
-				trace_i915_request_out(rq);
-				i915_request_put(rq);
-
-				GEM_TRACE("%s completed ctx=%d\n",
-					  engine->name, port->context_id);
-
-				execlists_port_complete(execlists, port);
-			} else {
-				port_set(port, port_pack(rq, count));
-			}
+/*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+static void execlists_submission_tasklet(unsigned long data)
+{
+	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-			/* After the final element, the hw should be idle */
-			GEM_BUG_ON(port_count(port) == 0 &&
-				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-			if (port_count(port) == 0)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_USER);
-		}
+	/*
+	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
+	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
+	 * not be relinquished until the device is idle (see
+	 * i915_gem_idle_work_handler()). As a precaution, we make sure
+	 * that all ELSP are drained i.e. we have processed the CSB,
+	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
+	 */
+	GEM_BUG_ON(!engine->i915->gt.awake);
 
-		if (head != execlists->csb_head) {
-			execlists->csb_head = head;
-			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-			       dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-		}
-	}
+	/*
+	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
+	 * imposing the cost of a locked atomic transaction when submitting a
+	 * new request (outside of the context-switch interrupt).
+	 */
+	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+		process_csb(engine);
 
-	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
+	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
-
-	if (fw)
-		intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1667,6 +1673,7 @@ static struct i915_request *
 execlists_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_request *request, *active;
 
 	/*
 	 * Prevent request submission to the hardware until we have
@@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 		tasklet_kill(&execlists->tasklet);
 	tasklet_disable(&execlists->tasklet);
 
-	return i915_gem_find_active_request(engine);
+	/*
+	 * We want to flush the pending context switches, having disabled
+	 * the tasklet above, we can assume exclusive access to the execlists.
+	 * For this allows us to catch up with an inflight preemption event,
+	 * and avoid blaming an innocent request if the stall was due to the
+	 * preemption itself.
+	 */
+	process_csb(engine);
+
+	/*
+	 * The last active request can then be no later than the last request
+	 * now in ELSP[0]. So search backwards from there, so that is the GPU
+	 * has advanced beyond the last CSB update, it will be pardoned.
+	 */
+	active = NULL;
+	request = port_request(execlists->port);
+	if (request) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&engine->timeline->lock, flags);
+		list_for_each_entry_from_reverse(request,
+						 &engine->timeline->requests,
+						 link) {
+			if (__i915_request_completed(request,
+						     request->global_seqno))
+				break;
+
+			active = request;
+		}
+		spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	}
+
+	return active;
 }
 
 static void reset_irq(struct intel_engine_cs *engine)
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
                   ` (3 preceding siblings ...)
  2018-03-20  0:18 ` [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset Chris Wilson
@ 2018-03-20  0:39 ` Michel Thierry
  2018-03-20  0:44   ` Chris Wilson
  2018-03-20  1:24 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] " Patchwork
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 19+ messages in thread
From: Michel Thierry @ 2018-03-20  0:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala

On 3/19/2018 5:18 PM, Chris Wilson wrote:
> Not all callers want the GPU error to handled in the same way, so expose
> a control parameter. In the first instance, some callers do not want the
> heavyweight error capture so add a bit to request the state to be
> captured and saved.
> 
> v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
> reason for the reset in the dev_notice(), superseding the earlier option
> to not print that notice.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c              |  4 +--
>   drivers/gpu/drm/i915/i915_drv.c                  | 17 +++++------
>   drivers/gpu/drm/i915/i915_drv.h                  | 10 +++---
>   drivers/gpu/drm/i915/i915_irq.c                  | 39 +++++++++++++-----------
>   drivers/gpu/drm/i915/intel_hangcheck.c           | 13 ++++----
>   drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
>   6 files changed, 48 insertions(+), 48 deletions(-)
> 
...
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> index 42e45ae87393..fd0ffb8328d0 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
>   	 */
>   	tmp = I915_READ_CTL(engine);
>   	if (tmp & RING_WAIT) {
> -		i915_handle_error(dev_priv, 0,
> -				  "Kicking stuck wait on %s",
> -				  engine->name);
> +		i915_handle_error(dev_priv, BIT(engine->id), 0,
> +				  "stuck wait on %s", engine->name);
Before we were not resetting anything here, is this change on purpose? 
(if it is, it's worth adding it to the commit msg since it's changing 
behavior).

>   		I915_WRITE_CTL(engine, tmp);
>   		return ENGINE_WAIT_KICK;
>   	} > @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 
acthd)
>   		default:
>   			return ENGINE_DEAD;
>   		case 1:
> -			i915_handle_error(dev_priv, 0,
> -					  "Kicking stuck semaphore on %s",
> +			i915_handle_error(dev_priv, ALL_ENGINES, 0,
Same here,

> +					  "stuck semaphore on %s",
>   					  engine->name);
>   			I915_WRITE_CTL(engine, tmp);
>   			return ENGINE_WAIT_KICK;

Everything else looks OK to me.

-Michel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
  2018-03-20  0:39 ` [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Michel Thierry
@ 2018-03-20  0:44   ` Chris Wilson
  2018-03-20  0:56     ` Michel Thierry
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20  0:44 UTC (permalink / raw)
  To: Michel Thierry, intel-gfx; +Cc: Mika Kuoppala

Quoting Michel Thierry (2018-03-20 00:39:35)
> On 3/19/2018 5:18 PM, Chris Wilson wrote:
> > Not all callers want the GPU error to handled in the same way, so expose
> > a control parameter. In the first instance, some callers do not want the
> > heavyweight error capture so add a bit to request the state to be
> > captured and saved.
> > 
> > v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
> > reason for the reset in the dev_notice(), superseding the earlier option
> > to not print that notice.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Jeff McGee <jeff.mcgee@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > Cc: Michel Thierry <michel.thierry@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c              |  4 +--
> >   drivers/gpu/drm/i915/i915_drv.c                  | 17 +++++------
> >   drivers/gpu/drm/i915/i915_drv.h                  | 10 +++---
> >   drivers/gpu/drm/i915/i915_irq.c                  | 39 +++++++++++++-----------
> >   drivers/gpu/drm/i915/intel_hangcheck.c           | 13 ++++----
> >   drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
> >   6 files changed, 48 insertions(+), 48 deletions(-)
> > 
> ...
> > diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> > index 42e45ae87393..fd0ffb8328d0 100644
> > --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> > +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> > @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
> >        */
> >       tmp = I915_READ_CTL(engine);
> >       if (tmp & RING_WAIT) {
> > -             i915_handle_error(dev_priv, 0,
> > -                               "Kicking stuck wait on %s",
> > -                               engine->name);
> > +             i915_handle_error(dev_priv, BIT(engine->id), 0,
> > +                               "stuck wait on %s", engine->name);
> Before we were not resetting anything here, is this change on purpose? 
> (if it is, it's worth adding it to the commit msg since it's changing 
> behavior).
> 
> >               I915_WRITE_CTL(engine, tmp);
> >               return ENGINE_WAIT_KICK;
> >       } > @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 
> acthd)
> >               default:
> >                       return ENGINE_DEAD;
> >               case 1:
> > -                     i915_handle_error(dev_priv, 0,
> > -                                       "Kicking stuck semaphore on %s",
> > +                     i915_handle_error(dev_priv, ALL_ENGINES, 0,
> Same here,

Both are functionally no-op changes, as they are only for !per-engine
platforms (unless someone manages to send just the wrong type of garbage
to the GPU). I just thought it interesting to document that wait-event
needs a local kick and the wait-sema needs to kick the other engines.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
  2018-03-20  0:44   ` Chris Wilson
@ 2018-03-20  0:56     ` Michel Thierry
  2018-03-20  1:09       ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Michel Thierry @ 2018-03-20  0:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala

On 3/19/2018 5:44 PM, Chris Wilson wrote:
> Quoting Michel Thierry (2018-03-20 00:39:35)
>> On 3/19/2018 5:18 PM, Chris Wilson wrote:
>>> Not all callers want the GPU error to handled in the same way, so expose
>>> a control parameter. In the first instance, some callers do not want the
>>> heavyweight error capture so add a bit to request the state to be
>>> captured and saved.
>>>
>>> v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
>>> reason for the reset in the dev_notice(), superseding the earlier option
>>> to not print that notice.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Jeff McGee <jeff.mcgee@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> Cc: Michel Thierry <michel.thierry@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_debugfs.c              |  4 +--
>>>    drivers/gpu/drm/i915/i915_drv.c                  | 17 +++++------
>>>    drivers/gpu/drm/i915/i915_drv.h                  | 10 +++---
>>>    drivers/gpu/drm/i915/i915_irq.c                  | 39 +++++++++++++-----------
>>>    drivers/gpu/drm/i915/intel_hangcheck.c           | 13 ++++----
>>>    drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
>>>    6 files changed, 48 insertions(+), 48 deletions(-)
>>>
>> ...
>>> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
>>> index 42e45ae87393..fd0ffb8328d0 100644
>>> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
>>> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
>>> @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
>>>         */
>>>        tmp = I915_READ_CTL(engine);
>>>        if (tmp & RING_WAIT) {
>>> -             i915_handle_error(dev_priv, 0,
>>> -                               "Kicking stuck wait on %s",
>>> -                               engine->name);
>>> +             i915_handle_error(dev_priv, BIT(engine->id), 0,
>>> +                               "stuck wait on %s", engine->name);
>> Before we were not resetting anything here, is this change on purpose?
>> (if it is, it's worth adding it to the commit msg since it's changing
>> behavior).
>>
>>>                I915_WRITE_CTL(engine, tmp);
>>>                return ENGINE_WAIT_KICK;
>>>        } > @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64
>> acthd)
>>>                default:
>>>                        return ENGINE_DEAD;
>>>                case 1:
>>> -                     i915_handle_error(dev_priv, 0,
>>> -                                       "Kicking stuck semaphore on %s",
>>> +                     i915_handle_error(dev_priv, ALL_ENGINES, 0,
>> Same here,
> 
> Both are functionally no-op changes, as they are only for !per-engine
> platforms (unless someone manages to send just the wrong type of garbage
> to the GPU). I just thought it interesting to document that wait-event
> needs a local kick and the wait-sema needs to kick the other engines.
i915_handle_error has this before full reset:

	if (!engine_mask)
		goto out;

No reset at all was happening before.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
  2018-03-20  0:56     ` Michel Thierry
@ 2018-03-20  1:09       ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-03-20  1:09 UTC (permalink / raw)
  To: Michel Thierry, intel-gfx; +Cc: Mika Kuoppala

Quoting Michel Thierry (2018-03-20 00:56:04)
> On 3/19/2018 5:44 PM, Chris Wilson wrote:
> > Quoting Michel Thierry (2018-03-20 00:39:35)
> >> On 3/19/2018 5:18 PM, Chris Wilson wrote:
> >>> Not all callers want the GPU error to handled in the same way, so expose
> >>> a control parameter. In the first instance, some callers do not want the
> >>> heavyweight error capture so add a bit to request the state to be
> >>> captured and saved.
> >>>
> >>> v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
> >>> reason for the reset in the dev_notice(), superseding the earlier option
> >>> to not print that notice.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Jeff McGee <jeff.mcgee@intel.com>
> >>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >>> Cc: Michel Thierry <michel.thierry@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_debugfs.c              |  4 +--
> >>>    drivers/gpu/drm/i915/i915_drv.c                  | 17 +++++------
> >>>    drivers/gpu/drm/i915/i915_drv.h                  | 10 +++---
> >>>    drivers/gpu/drm/i915/i915_irq.c                  | 39 +++++++++++++-----------
> >>>    drivers/gpu/drm/i915/intel_hangcheck.c           | 13 ++++----
> >>>    drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
> >>>    6 files changed, 48 insertions(+), 48 deletions(-)
> >>>
> >> ...
> >>> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> >>> index 42e45ae87393..fd0ffb8328d0 100644
> >>> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> >>> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> >>> @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
> >>>         */
> >>>        tmp = I915_READ_CTL(engine);
> >>>        if (tmp & RING_WAIT) {
> >>> -             i915_handle_error(dev_priv, 0,
> >>> -                               "Kicking stuck wait on %s",
> >>> -                               engine->name);
> >>> +             i915_handle_error(dev_priv, BIT(engine->id), 0,
> >>> +                               "stuck wait on %s", engine->name);
> >> Before we were not resetting anything here, is this change on purpose?
> >> (if it is, it's worth adding it to the commit msg since it's changing
> >> behavior).
> >>
> >>>                I915_WRITE_CTL(engine, tmp);
> >>>                return ENGINE_WAIT_KICK;
> >>>        } > @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64
> >> acthd)
> >>>                default:
> >>>                        return ENGINE_DEAD;
> >>>                case 1:
> >>> -                     i915_handle_error(dev_priv, 0,
> >>> -                                       "Kicking stuck semaphore on %s",
> >>> +                     i915_handle_error(dev_priv, ALL_ENGINES, 0,
> >> Same here,
> > 
> > Both are functionally no-op changes, as they are only for !per-engine
> > platforms (unless someone manages to send just the wrong type of garbage
> > to the GPU). I just thought it interesting to document that wait-event
> > needs a local kick and the wait-sema needs to kick the other engines.
> i915_handle_error has this before full reset:
> 
>         if (!engine_mask)
>                 goto out;
> 
> No reset at all was happening before.

We bugged out a while back then ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
                   ` (4 preceding siblings ...)
  2018-03-20  0:39 ` [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Michel Thierry
@ 2018-03-20  1:24 ` Patchwork
  2018-03-20  1:25 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-03-20  1:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
URL   : https://patchwork.freedesktop.org/series/40240/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
b279af513a73 drm/i915: Add control flags to i915_handle_error()
-:111: CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#111: FILE: drivers/gpu/drm/i915/i915_drv.h:2703:
+extern void i915_reset(struct drm_i915_private *i915, const char *msg);

-:112: CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#112: FILE: drivers/gpu/drm/i915/i915_drv.h:2704:
+extern int i915_reset_engine(struct intel_engine_cs *engine, const char *msg);

total: 0 errors, 0 warnings, 2 checks, 273 lines checked
76088d3cc0fb drm/i915/execlists: Refactor out complete_preempt_context()
fe83f1c114bf drm/i915: Move engine reset prepare/finish to backends
597e8d4ae7a5 drm/i915: Split execlists/guc reset prepartions
2950e96e849d drm/i915/execlists: Flush pending preemption events during reset
-:103: WARNING:LONG_LINE: line over 100 characters
#103: FILE: drivers/gpu/drm/i915/intel_lrc.c:879:
+		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",

-:104: WARNING:LONG_LINE: line over 100 characters
#104: FILE: drivers/gpu/drm/i915/intel_lrc.c:880:
+		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");

-:159: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#159: FILE: drivers/gpu/drm/i915/intel_lrc.c:911:
+			  status, buf[2*head + 1],
 			               ^

-:188: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#188: FILE: drivers/gpu/drm/i915/intel_lrc.c:929:
+		    buf[2*head + 1] == execlists->preempt_complete_status) {
 		         ^

total: 0 errors, 2 warnings, 2 checks, 396 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
                   ` (5 preceding siblings ...)
  2018-03-20  1:24 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] " Patchwork
@ 2018-03-20  1:25 ` Patchwork
  2018-03-20  1:41 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-03-20  6:45 ` ✗ Fi.CI.IGT: failure " Patchwork
  8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-03-20  1:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
URL   : https://patchwork.freedesktop.org/series/40240/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Add control flags to i915_handle_error()
+drivers/gpu/drm/i915/i915_request.c:1232:35: warning: Using plain integer as NULL pointer

Commit: drm/i915/execlists: Refactor out complete_preempt_context()
Okay!

Commit: drm/i915: Move engine reset prepare/finish to backends
Okay!

Commit: drm/i915: Split execlists/guc reset prepartions
Okay!

Commit: drm/i915/execlists: Flush pending preemption events during reset
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
                   ` (6 preceding siblings ...)
  2018-03-20  1:25 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-03-20  1:41 ` Patchwork
  2018-03-20  6:45 ` ✗ Fi.CI.IGT: failure " Patchwork
  8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-03-20  1:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
URL   : https://patchwork.freedesktop.org/series/40240/
State : success

== Summary ==

Series 40240v1 series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
https://patchwork.freedesktop.org/api/1.0/series/40240/revisions/1/mbox/

---- Known issues:

Test kms_chamelium:
        Subgroup dp-crc-fast:
                pass       -> DMESG-FAIL (fi-kbl-7500u) fdo#103841
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713

fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:432s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:445s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:378s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:533s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:296s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:516s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:516s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:504s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:414s
fi-cfl-s2        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:585s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:512s
fi-cnl-drrs      total:285  pass:254  dwarn:3   dfail:0   fail:0   skip:28  time:523s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:426s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:317s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:536s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:400s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:422s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:473s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:428s
fi-kbl-7500u     total:285  pass:259  dwarn:1   dfail:1   fail:0   skip:24  time:478s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:467s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:514s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:655s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:439s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:530s
fi-skl-6700hq    total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:542s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:504s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:487s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:427s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:447s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:568s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:397s

141def2a45f4a3ad7c7e9144cd26e97bb1298397 drm-tip: 2018y-03m-19d-23h-48m-43s UTC integration manifest
2950e96e849d drm/i915/execlists: Flush pending preemption events during reset
597e8d4ae7a5 drm/i915: Split execlists/guc reset prepartions
fe83f1c114bf drm/i915: Move engine reset prepare/finish to backends
76088d3cc0fb drm/i915/execlists: Refactor out complete_preempt_context()
b279af513a73 drm/i915: Add control flags to i915_handle_error()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8406/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
  2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
                   ` (7 preceding siblings ...)
  2018-03-20  1:41 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-03-20  6:45 ` Patchwork
  8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-03-20  6:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
URL   : https://patchwork.freedesktop.org/series/40240/
State : failure

== Summary ==

---- Possible new issues:

Test drm_mm:
        Subgroup sanitycheck:
                pass       -> INCOMPLETE (shard-apl)
Test drv_selftest:
        Subgroup live_guc:
                pass       -> SKIP       (shard-apl)
        Subgroup live_hangcheck:
                pass       -> DMESG-FAIL (shard-apl)

---- Known issues:

Test kms_cursor_legacy:
        Subgroup 2x-long-flip-vs-cursor-legacy:
                incomplete -> PASS       (shard-hsw) fdo#104873
Test kms_flip:
        Subgroup dpms-vs-vblank-race:
                fail       -> PASS       (shard-hsw) fdo#103060
        Subgroup flip-vs-absolute-wf_vblank:
                fail       -> PASS       (shard-hsw) fdo#100368
        Subgroup flip-vs-expired-vblank-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#102887
Test kms_flip_tiling:
        Subgroup flip-x-tiled:
                fail       -> PASS       (shard-apl) fdo#103822
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                skip       -> PASS       (shard-snb) fdo#103375 +1
Test kms_plane:
        Subgroup plane-position-hole-pipe-c-planes:
                fail       -> PASS       (shard-apl) fdo#103166

fdo#104873 https://bugs.freedesktop.org/show_bug.cgi?id=104873
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166

shard-apl        total:3462 pass:1795 dwarn:1   dfail:1   fail:7   skip:1656 time:12458s
shard-hsw        total:3478 pass:1767 dwarn:1   dfail:0   fail:2   skip:1707 time:11739s
shard-snb        total:3478 pass:1357 dwarn:1   dfail:0   fail:2   skip:2118 time:7153s
Blacklisted hosts:
shard-kbl        total:3478 pass:1937 dwarn:1   dfail:1   fail:9   skip:1530 time:9861s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8406/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset
  2018-03-20  0:18 ` [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset Chris Wilson
@ 2018-03-21  0:17   ` Jeff McGee
  2018-03-22 15:14     ` Jeff McGee
  0 siblings, 1 reply; 19+ messages in thread
From: Jeff McGee @ 2018-03-21  0:17 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote:
> Catch up with the inflight CSB events, after disabling the tasklet
> before deciding which request was truly guilty of hanging the GPU.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
>  1 file changed, 197 insertions(+), 158 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e5a3ffbc273a..beb81f13a3cc 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  	local_irq_restore(flags);
>  }
>  
> -/*
> - * Check the unread Context Status Buffers and manage the submission of new
> - * contexts to the ELSP accordingly.
> - */
> -static void execlists_submission_tasklet(unsigned long data)
> +static void process_csb(struct intel_engine_cs *engine)
>  {
> -	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
>  	struct execlist_port * const port = execlists->port;
> -	struct drm_i915_private *dev_priv = engine->i915;
> +	struct drm_i915_private *i915 = engine->i915;
> +	unsigned int head, tail;
> +	const u32 *buf;
>  	bool fw = false;
>  
> -	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
> -	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
> -	 * not be relinquished until the device is idle (see
> -	 * i915_gem_idle_work_handler()). As a precaution, we make sure
> -	 * that all ELSP are drained i.e. we have processed the CSB,
> -	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
> -	 */
> -	GEM_BUG_ON(!dev_priv->gt.awake);
> +	if (unlikely(execlists->csb_use_mmio)) {
> +		buf = (u32 * __force)
> +			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +		execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> +	} else {
> +		/* The HWSP contains a (cacheable) mirror of the CSB */
> +		buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> +	}
>  
> -	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
> -	 * imposing the cost of a locked atomic transaction when submitting a
> -	 * new request (outside of the context-switch interrupt).
> +	/*
> +	 * The write will be ordered by the uncached read (itself
> +	 * a memory barrier), so we do not need another in the form
> +	 * of a locked instruction. The race between the interrupt
> +	 * handler and the split test/clear is harmless as we order
> +	 * our clear before the CSB read. If the interrupt arrived
> +	 * first between the test and the clear, we read the updated
> +	 * CSB and clear the bit. If the interrupt arrives as we read
> +	 * the CSB or later (i.e. after we had cleared the bit) the bit
> +	 * is set and we do a new loop.
>  	 */
> -	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
> -		/* The HWSP contains a (cacheable) mirror of the CSB */
> -		const u32 *buf =
> -			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> -		unsigned int head, tail;
> -
> -		if (unlikely(execlists->csb_use_mmio)) {
> -			buf = (u32 * __force)
> -				(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> -			execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> -		}
> +	__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +	if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> +		intel_uncore_forcewake_get(i915, execlists->fw_domains);
> +		fw = true;
> +
> +		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> +		tail = GEN8_CSB_WRITE_PTR(head);
> +		head = GEN8_CSB_READ_PTR(head);
> +		execlists->csb_head = head;
> +	} else {
> +		const int write_idx =
> +			intel_hws_csb_write_index(i915) -
> +			I915_HWS_CSB_BUF0_INDEX;
> +
> +		head = execlists->csb_head;
> +		tail = READ_ONCE(buf[write_idx]);
> +	}
> +	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> +		  engine->name,
> +		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> +		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> +
> +	while (head != tail) {
> +		struct i915_request *rq;
> +		unsigned int status;
> +		unsigned int count;
> +
> +		if (++head == GEN8_CSB_ENTRIES)
> +			head = 0;
>  
> -		/* The write will be ordered by the uncached read (itself
> -		 * a memory barrier), so we do not need another in the form
> -		 * of a locked instruction. The race between the interrupt
> -		 * handler and the split test/clear is harmless as we order
> -		 * our clear before the CSB read. If the interrupt arrived
> -		 * first between the test and the clear, we read the updated
> -		 * CSB and clear the bit. If the interrupt arrives as we read
> -		 * the CSB or later (i.e. after we had cleared the bit) the bit
> -		 * is set and we do a new loop.
> +		/*
> +		 * We are flying near dragons again.
> +		 *
> +		 * We hold a reference to the request in execlist_port[]
> +		 * but no more than that. We are operating in softirq
> +		 * context and so cannot hold any mutex or sleep. That
> +		 * prevents us stopping the requests we are processing
> +		 * in port[] from being retired simultaneously (the
> +		 * breadcrumb will be complete before we see the
> +		 * context-switch). As we only hold the reference to the
> +		 * request, any pointer chasing underneath the request
> +		 * is subject to a potential use-after-free. Thus we
> +		 * store all of the bookkeeping within port[] as
> +		 * required, and avoid using unguarded pointers beneath
> +		 * request itself. The same applies to the atomic
> +		 * status notifier.
>  		 */
> -		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -		if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> -			if (!fw) {
> -				intel_uncore_forcewake_get(dev_priv,
> -							   execlists->fw_domains);
> -				fw = true;
> -			}
>  
> -			head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -			tail = GEN8_CSB_WRITE_PTR(head);
> -			head = GEN8_CSB_READ_PTR(head);
> -			execlists->csb_head = head;
> -		} else {
> -			const int write_idx =
> -				intel_hws_csb_write_index(dev_priv) -
> -				I915_HWS_CSB_BUF0_INDEX;
> +		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> +		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> +			  engine->name, head,
> +			  status, buf[2*head + 1],
> +			  execlists->active);
> +
> +		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +			      GEN8_CTX_STATUS_PREEMPTED))
> +			execlists_set_active(execlists,
> +					     EXECLISTS_ACTIVE_HWACK);
> +		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> +			execlists_clear_active(execlists,
> +					       EXECLISTS_ACTIVE_HWACK);
> +
> +		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> +			continue;
>  
> -			head = execlists->csb_head;
> -			tail = READ_ONCE(buf[write_idx]);
> -		}
> -		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> -			  engine->name,
> -			  head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> -			  tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> +		/* We should never get a COMPLETED | IDLE_ACTIVE! */
> +		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>  
> -		while (head != tail) {
> -			struct i915_request *rq;
> -			unsigned int status;
> -			unsigned int count;
> +		if (status & GEN8_CTX_STATUS_COMPLETE &&
> +		    buf[2*head + 1] == execlists->preempt_complete_status) {
> +			GEM_TRACE("%s preempt-idle\n", engine->name);
> +			complete_preempt_context(execlists);
> +			continue;
> +		}
>  
> -			if (++head == GEN8_CSB_ENTRIES)
> -				head = 0;
> +		if (status & GEN8_CTX_STATUS_PREEMPTED &&
> +		    execlists_is_active(execlists,
> +					EXECLISTS_ACTIVE_PREEMPT))
> +			continue;
>  
> -			/* We are flying near dragons again.
> -			 *
> -			 * We hold a reference to the request in execlist_port[]
> -			 * but no more than that. We are operating in softirq
> -			 * context and so cannot hold any mutex or sleep. That
> -			 * prevents us stopping the requests we are processing
> -			 * in port[] from being retired simultaneously (the
> -			 * breadcrumb will be complete before we see the
> -			 * context-switch). As we only hold the reference to the
> -			 * request, any pointer chasing underneath the request
> -			 * is subject to a potential use-after-free. Thus we
> -			 * store all of the bookkeeping within port[] as
> -			 * required, and avoid using unguarded pointers beneath
> -			 * request itself. The same applies to the atomic
> -			 * status notifier.
> -			 */
> +		GEM_BUG_ON(!execlists_is_active(execlists,
> +						EXECLISTS_ACTIVE_USER));
>  
> -			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> -			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> -				  engine->name, head,
> -				  status, buf[2*head + 1],
> -				  execlists->active);
> -
> -			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -				      GEN8_CTX_STATUS_PREEMPTED))
> -				execlists_set_active(execlists,
> -						     EXECLISTS_ACTIVE_HWACK);
> -			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> -				execlists_clear_active(execlists,
> -						       EXECLISTS_ACTIVE_HWACK);
> -
> -			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> -				continue;
> +		rq = port_unpack(port, &count);
> +		GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> +			  engine->name,
> +			  port->context_id, count,
> +			  rq ? rq->global_seqno : 0,
> +			  rq ? rq_prio(rq) : 0);
> +
> +		/* Check the context/desc id for this event matches */
> +		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> +
> +		GEM_BUG_ON(count == 0);
> +		if (--count == 0) {
> +			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> +			GEM_BUG_ON(port_isset(&port[1]) &&
> +				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> +			GEM_BUG_ON(!i915_request_completed(rq));
> +			execlists_context_schedule_out(rq);
> +			trace_i915_request_out(rq);
> +			i915_request_put(rq);
> +
> +			GEM_TRACE("%s completed ctx=%d\n",
> +				  engine->name, port->context_id);
> +
> +			execlists_port_complete(execlists, port);
> +		} else {
> +			port_set(port, port_pack(rq, count));
> +		}
>  
> -			/* We should never get a COMPLETED | IDLE_ACTIVE! */
> -			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> +		/* After the final element, the hw should be idle */
> +		GEM_BUG_ON(port_count(port) == 0 &&
> +			   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> +		if (port_count(port) == 0)
> +			execlists_clear_active(execlists,
> +					       EXECLISTS_ACTIVE_USER);
> +	}
>  
> -			if (status & GEN8_CTX_STATUS_COMPLETE &&
> -			    buf[2*head + 1] == execlists->preempt_complete_status) {
> -				GEM_TRACE("%s preempt-idle\n", engine->name);
> -				complete_preempt_context(execlists);
> -				continue;
> -			}
> +	if (head != execlists->csb_head) {
> +		execlists->csb_head = head;
> +		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> +		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> +	}
>  
> -			if (status & GEN8_CTX_STATUS_PREEMPTED &&
> -			    execlists_is_active(execlists,
> -						EXECLISTS_ACTIVE_PREEMPT))
> -				continue;
> +	if (unlikely(fw))
> +		intel_uncore_forcewake_put(i915, execlists->fw_domains);
> +}
>  
> -			GEM_BUG_ON(!execlists_is_active(execlists,
> -							EXECLISTS_ACTIVE_USER));
> -
> -			rq = port_unpack(port, &count);
> -			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> -				  engine->name,
> -				  port->context_id, count,
> -				  rq ? rq->global_seqno : 0,
> -				  rq ? rq_prio(rq) : 0);
> -
> -			/* Check the context/desc id for this event matches */
> -			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> -
> -			GEM_BUG_ON(count == 0);
> -			if (--count == 0) {
> -				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> -				GEM_BUG_ON(port_isset(&port[1]) &&
> -					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> -				GEM_BUG_ON(!i915_request_completed(rq));
> -				execlists_context_schedule_out(rq);
> -				trace_i915_request_out(rq);
> -				i915_request_put(rq);
> -
> -				GEM_TRACE("%s completed ctx=%d\n",
> -					  engine->name, port->context_id);
> -
> -				execlists_port_complete(execlists, port);
> -			} else {
> -				port_set(port, port_pack(rq, count));
> -			}
> +/*
> + * Check the unread Context Status Buffers and manage the submission of new
> + * contexts to the ELSP accordingly.
> + */
> +static void execlists_submission_tasklet(unsigned long data)
> +{
> +	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>  
> -			/* After the final element, the hw should be idle */
> -			GEM_BUG_ON(port_count(port) == 0 &&
> -				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> -			if (port_count(port) == 0)
> -				execlists_clear_active(execlists,
> -						       EXECLISTS_ACTIVE_USER);
> -		}
> +	/*
> +	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> +	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
> +	 * not be relinquished until the device is idle (see
> +	 * i915_gem_idle_work_handler()). As a precaution, we make sure
> +	 * that all ELSP are drained i.e. we have processed the CSB,
> +	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
> +	 */
> +	GEM_BUG_ON(!engine->i915->gt.awake);
>  
> -		if (head != execlists->csb_head) {
> -			execlists->csb_head = head;
> -			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> -			       dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -		}
> -	}
> +	/*
> +	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> +	 * imposing the cost of a locked atomic transaction when submitting a
> +	 * new request (outside of the context-switch interrupt).
> +	 */
> +	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
This was a 'while' before refactoring. Shouldn't it still be in order to catch
new irq_posted during processing?

BTW, I have revised and rebased the force preemption patches on drm-tip with
this and the other related patches you have proposed. I just started running
my IGT test that covers the innocent context on port[1] scenario that we
discussed on IRC. Getting a sporadic failure but need more time to root cause.
Will update soon.

> +		process_csb(engine);
>  
> -	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> +	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
>  		execlists_dequeue(engine);
> -
> -	if (fw)
> -		intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
>  }
>  
>  static void queue_request(struct intel_engine_cs *engine,
> @@ -1667,6 +1673,7 @@ static struct i915_request *
>  execlists_reset_prepare(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	struct i915_request *request, *active;
>  
>  	/*
>  	 * Prevent request submission to the hardware until we have
> @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>  		tasklet_kill(&execlists->tasklet);
>  	tasklet_disable(&execlists->tasklet);
>  
> -	return i915_gem_find_active_request(engine);
> +	/*
> +	 * We want to flush the pending context switches, having disabled
> +	 * the tasklet above, we can assume exclusive access to the execlists.
> +	 * For this allows us to catch up with an inflight preemption event,
> +	 * and avoid blaming an innocent request if the stall was due to the
> +	 * preemption itself.
> +	 */
> +	process_csb(engine);
> +
> +	/*
> +	 * The last active request can then be no later than the last request
> +	 * now in ELSP[0]. So search backwards from there, so that is the GPU
> +	 * has advanced beyond the last CSB update, it will be pardoned.
> +	 */
> +	active = NULL;
> +	request = port_request(execlists->port);
> +	if (request) {
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&engine->timeline->lock, flags);
> +		list_for_each_entry_from_reverse(request,
> +						 &engine->timeline->requests,
> +						 link) {
> +			if (__i915_request_completed(request,
> +						     request->global_seqno))
> +				break;
> +
> +			active = request;
> +		}
> +		spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +	}
> +
> +	return active;
>  }
>  
>  static void reset_irq(struct intel_engine_cs *engine)
> -- 
> 2.16.2
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset
  2018-03-21  0:17   ` Jeff McGee
@ 2018-03-22 15:14     ` Jeff McGee
  2018-03-26 11:28       ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Jeff McGee @ 2018-03-22 15:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Mar 20, 2018 at 05:17:34PM -0700, Jeff McGee wrote:
> On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote:
> > Catch up with the inflight CSB events, after disabling the tasklet
> > before deciding which request was truly guilty of hanging the GPU.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > CC: Michel Thierry <michel.thierry@intel.com>
> > Cc: Jeff McGee <jeff.mcgee@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
> >  1 file changed, 197 insertions(+), 158 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index e5a3ffbc273a..beb81f13a3cc 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> >  	local_irq_restore(flags);
> >  }
> >  
> > -/*
> > - * Check the unread Context Status Buffers and manage the submission of new
> > - * contexts to the ELSP accordingly.
> > - */
> > -static void execlists_submission_tasklet(unsigned long data)
> > +static void process_csb(struct intel_engine_cs *engine)
> >  {
> > -	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> >  	struct intel_engine_execlists * const execlists = &engine->execlists;
> >  	struct execlist_port * const port = execlists->port;
> > -	struct drm_i915_private *dev_priv = engine->i915;
> > +	struct drm_i915_private *i915 = engine->i915;
> > +	unsigned int head, tail;
> > +	const u32 *buf;
> >  	bool fw = false;
> >  
> > -	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
> > -	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
> > -	 * not be relinquished until the device is idle (see
> > -	 * i915_gem_idle_work_handler()). As a precaution, we make sure
> > -	 * that all ELSP are drained i.e. we have processed the CSB,
> > -	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
> > -	 */
> > -	GEM_BUG_ON(!dev_priv->gt.awake);
> > +	if (unlikely(execlists->csb_use_mmio)) {
> > +		buf = (u32 * __force)
> > +			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > +		execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> > +	} else {
> > +		/* The HWSP contains a (cacheable) mirror of the CSB */
> > +		buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > +	}
> >  
> > -	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
> > -	 * imposing the cost of a locked atomic transaction when submitting a
> > -	 * new request (outside of the context-switch interrupt).
> > +	/*
> > +	 * The write will be ordered by the uncached read (itself
> > +	 * a memory barrier), so we do not need another in the form
> > +	 * of a locked instruction. The race between the interrupt
> > +	 * handler and the split test/clear is harmless as we order
> > +	 * our clear before the CSB read. If the interrupt arrived
> > +	 * first between the test and the clear, we read the updated
> > +	 * CSB and clear the bit. If the interrupt arrives as we read
> > +	 * the CSB or later (i.e. after we had cleared the bit) the bit
> > +	 * is set and we do a new loop.
> >  	 */
> > -	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
> > -		/* The HWSP contains a (cacheable) mirror of the CSB */
> > -		const u32 *buf =
> > -			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > -		unsigned int head, tail;
> > -
> > -		if (unlikely(execlists->csb_use_mmio)) {
> > -			buf = (u32 * __force)
> > -				(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > -			execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> > -		}
> > +	__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > +	if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> > +		intel_uncore_forcewake_get(i915, execlists->fw_domains);
> > +		fw = true;
> > +
> > +		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > +		tail = GEN8_CSB_WRITE_PTR(head);
> > +		head = GEN8_CSB_READ_PTR(head);
> > +		execlists->csb_head = head;
> > +	} else {
> > +		const int write_idx =
> > +			intel_hws_csb_write_index(i915) -
> > +			I915_HWS_CSB_BUF0_INDEX;
> > +
> > +		head = execlists->csb_head;
> > +		tail = READ_ONCE(buf[write_idx]);
> > +	}
> > +	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > +		  engine->name,
> > +		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > +		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > +
> > +	while (head != tail) {
> > +		struct i915_request *rq;
> > +		unsigned int status;
> > +		unsigned int count;
> > +
> > +		if (++head == GEN8_CSB_ENTRIES)
> > +			head = 0;
> >  
> > -		/* The write will be ordered by the uncached read (itself
> > -		 * a memory barrier), so we do not need another in the form
> > -		 * of a locked instruction. The race between the interrupt
> > -		 * handler and the split test/clear is harmless as we order
> > -		 * our clear before the CSB read. If the interrupt arrived
> > -		 * first between the test and the clear, we read the updated
> > -		 * CSB and clear the bit. If the interrupt arrives as we read
> > -		 * the CSB or later (i.e. after we had cleared the bit) the bit
> > -		 * is set and we do a new loop.
> > +		/*
> > +		 * We are flying near dragons again.
> > +		 *
> > +		 * We hold a reference to the request in execlist_port[]
> > +		 * but no more than that. We are operating in softirq
> > +		 * context and so cannot hold any mutex or sleep. That
> > +		 * prevents us stopping the requests we are processing
> > +		 * in port[] from being retired simultaneously (the
> > +		 * breadcrumb will be complete before we see the
> > +		 * context-switch). As we only hold the reference to the
> > +		 * request, any pointer chasing underneath the request
> > +		 * is subject to a potential use-after-free. Thus we
> > +		 * store all of the bookkeeping within port[] as
> > +		 * required, and avoid using unguarded pointers beneath
> > +		 * request itself. The same applies to the atomic
> > +		 * status notifier.
> >  		 */
> > -		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > -		if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> > -			if (!fw) {
> > -				intel_uncore_forcewake_get(dev_priv,
> > -							   execlists->fw_domains);
> > -				fw = true;
> > -			}
> >  
> > -			head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > -			tail = GEN8_CSB_WRITE_PTR(head);
> > -			head = GEN8_CSB_READ_PTR(head);
> > -			execlists->csb_head = head;
> > -		} else {
> > -			const int write_idx =
> > -				intel_hws_csb_write_index(dev_priv) -
> > -				I915_HWS_CSB_BUF0_INDEX;
> > +		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > +		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > +			  engine->name, head,
> > +			  status, buf[2*head + 1],
> > +			  execlists->active);
> > +
> > +		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > +			      GEN8_CTX_STATUS_PREEMPTED))
> > +			execlists_set_active(execlists,
> > +					     EXECLISTS_ACTIVE_HWACK);
> > +		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > +			execlists_clear_active(execlists,
> > +					       EXECLISTS_ACTIVE_HWACK);
> > +
> > +		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > +			continue;
> >  
> > -			head = execlists->csb_head;
> > -			tail = READ_ONCE(buf[write_idx]);
> > -		}
> > -		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > -			  engine->name,
> > -			  head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > -			  tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > +		/* We should never get a COMPLETED | IDLE_ACTIVE! */
> > +		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> >  
> > -		while (head != tail) {
> > -			struct i915_request *rq;
> > -			unsigned int status;
> > -			unsigned int count;
> > +		if (status & GEN8_CTX_STATUS_COMPLETE &&
> > +		    buf[2*head + 1] == execlists->preempt_complete_status) {
> > +			GEM_TRACE("%s preempt-idle\n", engine->name);
> > +			complete_preempt_context(execlists);
> > +			continue;
> > +		}
> >  
> > -			if (++head == GEN8_CSB_ENTRIES)
> > -				head = 0;
> > +		if (status & GEN8_CTX_STATUS_PREEMPTED &&
> > +		    execlists_is_active(execlists,
> > +					EXECLISTS_ACTIVE_PREEMPT))
> > +			continue;
> >  
> > -			/* We are flying near dragons again.
> > -			 *
> > -			 * We hold a reference to the request in execlist_port[]
> > -			 * but no more than that. We are operating in softirq
> > -			 * context and so cannot hold any mutex or sleep. That
> > -			 * prevents us stopping the requests we are processing
> > -			 * in port[] from being retired simultaneously (the
> > -			 * breadcrumb will be complete before we see the
> > -			 * context-switch). As we only hold the reference to the
> > -			 * request, any pointer chasing underneath the request
> > -			 * is subject to a potential use-after-free. Thus we
> > -			 * store all of the bookkeeping within port[] as
> > -			 * required, and avoid using unguarded pointers beneath
> > -			 * request itself. The same applies to the atomic
> > -			 * status notifier.
> > -			 */
> > +		GEM_BUG_ON(!execlists_is_active(execlists,
> > +						EXECLISTS_ACTIVE_USER));
> >  
> > -			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > -			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > -				  engine->name, head,
> > -				  status, buf[2*head + 1],
> > -				  execlists->active);
> > -
> > -			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > -				      GEN8_CTX_STATUS_PREEMPTED))
> > -				execlists_set_active(execlists,
> > -						     EXECLISTS_ACTIVE_HWACK);
> > -			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > -				execlists_clear_active(execlists,
> > -						       EXECLISTS_ACTIVE_HWACK);
> > -
> > -			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > -				continue;
> > +		rq = port_unpack(port, &count);
> > +		GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> > +			  engine->name,
> > +			  port->context_id, count,
> > +			  rq ? rq->global_seqno : 0,
> > +			  rq ? rq_prio(rq) : 0);
> > +
> > +		/* Check the context/desc id for this event matches */
> > +		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> > +
> > +		GEM_BUG_ON(count == 0);
> > +		if (--count == 0) {
> > +			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> > +			GEM_BUG_ON(port_isset(&port[1]) &&
> > +				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> > +			GEM_BUG_ON(!i915_request_completed(rq));
> > +			execlists_context_schedule_out(rq);
> > +			trace_i915_request_out(rq);
> > +			i915_request_put(rq);
> > +
> > +			GEM_TRACE("%s completed ctx=%d\n",
> > +				  engine->name, port->context_id);
> > +
> > +			execlists_port_complete(execlists, port);
> > +		} else {
> > +			port_set(port, port_pack(rq, count));
> > +		}
> >  
> > -			/* We should never get a COMPLETED | IDLE_ACTIVE! */
> > -			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> > +		/* After the final element, the hw should be idle */
> > +		GEM_BUG_ON(port_count(port) == 0 &&
> > +			   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > +		if (port_count(port) == 0)
> > +			execlists_clear_active(execlists,
> > +					       EXECLISTS_ACTIVE_USER);
> > +	}
> >  
> > -			if (status & GEN8_CTX_STATUS_COMPLETE &&
> > -			    buf[2*head + 1] == execlists->preempt_complete_status) {
> > -				GEM_TRACE("%s preempt-idle\n", engine->name);
> > -				complete_preempt_context(execlists);
> > -				continue;
> > -			}
> > +	if (head != execlists->csb_head) {
> > +		execlists->csb_head = head;
> > +		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > +		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > +	}
> >  
> > -			if (status & GEN8_CTX_STATUS_PREEMPTED &&
> > -			    execlists_is_active(execlists,
> > -						EXECLISTS_ACTIVE_PREEMPT))
> > -				continue;
> > +	if (unlikely(fw))
> > +		intel_uncore_forcewake_put(i915, execlists->fw_domains);
> > +}
> >  
> > -			GEM_BUG_ON(!execlists_is_active(execlists,
> > -							EXECLISTS_ACTIVE_USER));
> > -
> > -			rq = port_unpack(port, &count);
> > -			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> > -				  engine->name,
> > -				  port->context_id, count,
> > -				  rq ? rq->global_seqno : 0,
> > -				  rq ? rq_prio(rq) : 0);
> > -
> > -			/* Check the context/desc id for this event matches */
> > -			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> > -
> > -			GEM_BUG_ON(count == 0);
> > -			if (--count == 0) {
> > -				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> > -				GEM_BUG_ON(port_isset(&port[1]) &&
> > -					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> > -				GEM_BUG_ON(!i915_request_completed(rq));
> > -				execlists_context_schedule_out(rq);
> > -				trace_i915_request_out(rq);
> > -				i915_request_put(rq);
> > -
> > -				GEM_TRACE("%s completed ctx=%d\n",
> > -					  engine->name, port->context_id);
> > -
> > -				execlists_port_complete(execlists, port);
> > -			} else {
> > -				port_set(port, port_pack(rq, count));
> > -			}
> > +/*
> > + * Check the unread Context Status Buffers and manage the submission of new
> > + * contexts to the ELSP accordingly.
> > + */
> > +static void execlists_submission_tasklet(unsigned long data)
> > +{
> > +	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> >  
> > -			/* After the final element, the hw should be idle */
> > -			GEM_BUG_ON(port_count(port) == 0 &&
> > -				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > -			if (port_count(port) == 0)
> > -				execlists_clear_active(execlists,
> > -						       EXECLISTS_ACTIVE_USER);
> > -		}
> > +	/*
> > +	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> > +	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
> > +	 * not be relinquished until the device is idle (see
> > +	 * i915_gem_idle_work_handler()). As a precaution, we make sure
> > +	 * that all ELSP are drained i.e. we have processed the CSB,
> > +	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
> > +	 */
> > +	GEM_BUG_ON(!engine->i915->gt.awake);
> >  
> > -		if (head != execlists->csb_head) {
> > -			execlists->csb_head = head;
> > -			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > -			       dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > -		}
> > -	}
> > +	/*
> > +	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> > +	 * imposing the cost of a locked atomic transaction when submitting a
> > +	 * new request (outside of the context-switch interrupt).
> > +	 */
> > +	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> This was a 'while' before refactoring. Shouldn't it still be in order to catch
> new irq_posted during processing?
> 
> BTW, I have revised and rebased the force preemption patches on drm-tip with
> this and the other related patches you have proposed. I just started running
> my IGT test that covers the innocent context on port[1] scenario that we
> discussed on IRC. Getting a sporadic failure but need more time to root cause.
> Will update soon.
> 
> > +		process_csb(engine);
> >  
> > -	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> > +	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> >  		execlists_dequeue(engine);
> > -
> > -	if (fw)
> > -		intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> >  }
> >  
> >  static void queue_request(struct intel_engine_cs *engine,
> > @@ -1667,6 +1673,7 @@ static struct i915_request *
> >  execlists_reset_prepare(struct intel_engine_cs *engine)
> >  {
> >  	struct intel_engine_execlists * const execlists = &engine->execlists;
> > +	struct i915_request *request, *active;
> >  
> >  	/*
> >  	 * Prevent request submission to the hardware until we have
> > @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> >  		tasklet_kill(&execlists->tasklet);
> >  	tasklet_disable(&execlists->tasklet);
> >  
> > -	return i915_gem_find_active_request(engine);
> > +	/*
> > +	 * We want to flush the pending context switches, having disabled
> > +	 * the tasklet above, we can assume exclusive access to the execlists.
> > +	 * For this allows us to catch up with an inflight preemption event,
> > +	 * and avoid blaming an innocent request if the stall was due to the
> > +	 * preemption itself.
> > +	 */
> > +	process_csb(engine);
> > +
> > +	/*
> > +	 * The last active request can then be no later than the last request
> > +	 * now in ELSP[0]. So search backwards from there, so that is the GPU
> > +	 * has advanced beyond the last CSB update, it will be pardoned.
> > +	 */
> > +	active = NULL;
> > +	request = port_request(execlists->port);
> > +	if (request) {
> > +		unsigned long flags;
> > +
> > +		spin_lock_irqsave(&engine->timeline->lock, flags);
> > +		list_for_each_entry_from_reverse(request,
> > +						 &engine->timeline->requests,
> > +						 link) {
> > +			if (__i915_request_completed(request,
> > +						     request->global_seqno))
> > +				break;
> > +
> > +			active = request;
> > +		}
> > +		spin_unlock_irqrestore(&engine->timeline->lock, flags);
> > +	}
> > +
> > +	return active;

Now we can see an abort of the reset if process_csb() processes a completed
preemption. So we need to kick the tasklet to get a dequeue of the waiting
request to happen. Currently we only kick if needed in gen8_init_common_ring
which is skipped if we abort the reset.

Otherwise this is working well in my force preemption tests.
-Jeff
> >  }
> >  
> >  static void reset_irq(struct intel_engine_cs *engine)
> > -- 
> > 2.16.2
> > 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context()
  2018-03-20  0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
@ 2018-03-22 15:26   ` Jeff McGee
  0 siblings, 0 replies; 19+ messages in thread
From: Jeff McGee @ 2018-03-22 15:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Mar 20, 2018 at 12:18:45AM +0000, Chris Wilson wrote:
> As a complement to inject_preempt_context(), follow up with the function
> to handle its completion. This will be useful should we wish to extend
> the duties of the preempt-context for execlists.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 22 ++++++++++++----------
>  1 file changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 53f1c009ed7b..0bfaeb56b8c7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -531,8 +531,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>  	if (execlists->ctrl_reg)
>  		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>  
> -	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
> -	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> +	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
> +	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> +}
> +
> +static void complete_preempt_context(struct intel_engine_execlists *execlists)
> +{
> +	execlists_cancel_port_requests(execlists);
> +	execlists_unwind_incomplete_requests(execlists);
> +
> +	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
> +	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>  }
>  
>  static void execlists_dequeue(struct intel_engine_cs *engine)
> @@ -939,14 +948,7 @@ static void execlists_submission_tasklet(unsigned long data)
>  			if (status & GEN8_CTX_STATUS_COMPLETE &&
>  			    buf[2*head + 1] == execlists->preempt_complete_status) {
>  				GEM_TRACE("%s preempt-idle\n", engine->name);
> -
> -				execlists_cancel_port_requests(execlists);
> -				execlists_unwind_incomplete_requests(execlists);
> -
> -				GEM_BUG_ON(!execlists_is_active(execlists,
> -								EXECLISTS_ACTIVE_PREEMPT));
> -				execlists_clear_active(execlists,
> -						       EXECLISTS_ACTIVE_PREEMPT);
> +				complete_preempt_context(execlists);
>  				continue;
>  			}
>  
> -- 
> 2.16.2
> 

Reviewed-by: Jeff McGee <jeff.mcgee@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends
  2018-03-20  0:18 ` [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends Chris Wilson
@ 2018-03-22 15:28   ` Jeff McGee
  0 siblings, 0 replies; 19+ messages in thread
From: Jeff McGee @ 2018-03-22 15:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Mar 20, 2018 at 12:18:46AM +0000, Chris Wilson wrote:
> In preparation to more carefully handling incomplete preemption during
> reset by execlists, we move the existing code wholesale to the backends
> under a couple of new reset vfuncs.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c         | 42 ++++----------------------
>  drivers/gpu/drm/i915/intel_lrc.c        | 52 +++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  9 ++++--
>  4 files changed, 78 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 802df8e1a544..38f7160d99c9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2917,7 +2917,7 @@ static bool engine_stalled(struct intel_engine_cs *engine)
>  struct i915_request *
>  i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
>  {
> -	struct i915_request *request = NULL;
> +	struct i915_request *request;
>  
>  	/*
>  	 * During the reset sequence, we must prevent the engine from
> @@ -2940,40 +2940,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
>  	 */
>  	kthread_park(engine->breadcrumbs.signaler);
>  
> -	/*
> -	 * Prevent request submission to the hardware until we have
> -	 * completed the reset in i915_gem_reset_finish(). If a request
> -	 * is completed by one engine, it may then queue a request
> -	 * to a second via its execlists->tasklet *just* as we are
> -	 * calling engine->init_hw() and also writing the ELSP.
> -	 * Turning off the execlists->tasklet until the reset is over
> -	 * prevents the race.
> -	 *
> -	 * Note that this needs to be a single atomic operation on the
> -	 * tasklet (flush existing tasks, prevent new tasks) to prevent
> -	 * a race between reset and set-wedged. It is not, so we do the best
> -	 * we can atm and make sure we don't lock the machine up in the more
> -	 * common case of recursively being called from set-wedged from inside
> -	 * i915_reset.
> -	 */
> -	if (!atomic_read(&engine->execlists.tasklet.count))
> -		tasklet_kill(&engine->execlists.tasklet);
> -	tasklet_disable(&engine->execlists.tasklet);
> -
> -	/*
> -	 * We're using worker to queue preemption requests from the tasklet in
> -	 * GuC submission mode.
> -	 * Even though tasklet was disabled, we may still have a worker queued.
> -	 * Let's make sure that all workers scheduled before disabling the
> -	 * tasklet are completed before continuing with the reset.
> -	 */
> -	if (engine->i915->guc.preempt_wq)
> -		flush_workqueue(engine->i915->guc.preempt_wq);
> -
> -	if (engine->irq_seqno_barrier)
> -		engine->irq_seqno_barrier(engine);
> -
> -	request = i915_gem_find_active_request(engine);
> +	request = engine->reset.prepare(engine);
>  	if (request && request->fence.error == -EIO)
>  		request = ERR_PTR(-EIO); /* Previous reset failed! */
>  
> @@ -3120,7 +3087,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
>  	}
>  
>  	/* Setup the CS to resume from the breadcrumb of the hung request */
> -	engine->reset_hw(engine, request);
> +	engine->reset.reset(engine, request);
>  }
>  
>  void i915_gem_reset(struct drm_i915_private *dev_priv)
> @@ -3172,7 +3139,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
>  
>  void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
>  {
> -	tasklet_enable(&engine->execlists.tasklet);
> +	engine->reset.finish(engine);
> +
>  	kthread_unpark(engine->breadcrumbs.signaler);
>  
>  	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 0bfaeb56b8c7..f662a9524233 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1663,6 +1663,44 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
>  	return init_workarounds_ring(engine);
>  }
>  
> +static struct i915_request *
> +execlists_reset_prepare(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +
> +	/*
> +	 * Prevent request submission to the hardware until we have
> +	 * completed the reset in i915_gem_reset_finish(). If a request
> +	 * is completed by one engine, it may then queue a request
> +	 * to a second via its execlists->tasklet *just* as we are
> +	 * calling engine->init_hw() and also writing the ELSP.
> +	 * Turning off the execlists->tasklet until the reset is over
> +	 * prevents the race.
> +	 *
> +	 * Note that this needs to be a single atomic operation on the
> +	 * tasklet (flush existing tasks, prevent new tasks) to prevent
> +	 * a race between reset and set-wedged. It is not, so we do the best
> +	 * we can atm and make sure we don't lock the machine up in the more
> +	 * common case of recursively being called from set-wedged from inside
> +	 * i915_reset.
> +	 */
> +	if (!atomic_read(&execlists->tasklet.count))
> +		tasklet_kill(&execlists->tasklet);
> +	tasklet_disable(&execlists->tasklet);
> +
> +	/*
> +	 * We're using worker to queue preemption requests from the tasklet in
> +	 * GuC submission mode.
> +	 * Even though tasklet was disabled, we may still have a worker queued.
> +	 * Let's make sure that all workers scheduled before disabling the
> +	 * tasklet are completed before continuing with the reset.
> +	 */
> +	if (engine->i915->guc.preempt_wq)
> +		flush_workqueue(engine->i915->guc.preempt_wq);
> +
> +	return i915_gem_find_active_request(engine);
> +}
> +
>  static void reset_irq(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> @@ -1692,8 +1730,8 @@ static void reset_irq(struct intel_engine_cs *engine)
>  	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>  }
>  
> -static void reset_common_ring(struct intel_engine_cs *engine,
> -			      struct i915_request *request)
> +static void execlists_reset(struct intel_engine_cs *engine,
> +			    struct i915_request *request)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
>  	struct intel_context *ce;
> @@ -1766,6 +1804,11 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>  	unwind_wa_tail(request);
>  }
>  
> +static void execlists_reset_finish(struct intel_engine_cs *engine)
> +{
> +	tasklet_enable(&engine->execlists.tasklet);
> +}
> +
>  static int intel_logical_ring_emit_pdps(struct i915_request *rq)
>  {
>  	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
> @@ -2090,7 +2133,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>  {
>  	/* Default vfuncs which can be overriden by each engine. */
>  	engine->init_hw = gen8_init_common_ring;
> -	engine->reset_hw = reset_common_ring;
> +
> +	engine->reset.prepare = execlists_reset_prepare;
> +	engine->reset.reset = execlists_reset;
> +	engine->reset.finish = execlists_reset_finish;
>  
>  	engine->context_pin = execlists_context_pin;
>  	engine->context_unpin = execlists_context_unpin;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 04d9d9a946a7..eebcc877ef60 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -530,8 +530,16 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  	return ret;
>  }
>  
> -static void reset_ring_common(struct intel_engine_cs *engine,
> -			      struct i915_request *request)
> +static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
> +{
> +	if (engine->irq_seqno_barrier)
> +		engine->irq_seqno_barrier(engine);
> +
> +	return i915_gem_find_active_request(engine);
> +}
> +
> +static void reset_ring(struct intel_engine_cs *engine,
> +		       struct i915_request *request)
>  {
>  	/*
>  	 * RC6 must be prevented until the reset is complete and the engine
> @@ -595,6 +603,10 @@ static void reset_ring_common(struct intel_engine_cs *engine,
>  	}
>  }
>  
> +static void reset_finish(struct intel_engine_cs *engine)
> +{
> +}
> +
>  static int intel_rcs_ctx_init(struct i915_request *rq)
>  {
>  	int ret;
> @@ -1987,7 +1999,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>  	intel_ring_init_semaphores(dev_priv, engine);
>  
>  	engine->init_hw = init_ring_common;
> -	engine->reset_hw = reset_ring_common;
> +	engine->reset.prepare = reset_prepare;
> +	engine->reset.reset = reset_ring;
> +	engine->reset.finish = reset_finish;
>  
>  	engine->context_pin = intel_ring_context_pin;
>  	engine->context_unpin = intel_ring_context_unpin;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 1f50727a5ddb..e2681303ce21 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -418,8 +418,13 @@ struct intel_engine_cs {
>  	void		(*irq_disable)(struct intel_engine_cs *engine);
>  
>  	int		(*init_hw)(struct intel_engine_cs *engine);
> -	void		(*reset_hw)(struct intel_engine_cs *engine,
> -				    struct i915_request *rq);
> +
> +	struct {
> +		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
> +		void (*reset)(struct intel_engine_cs *engine,
> +			      struct i915_request *rq);
> +		void (*finish)(struct intel_engine_cs *engine);
> +	} reset;
>  
>  	void		(*park)(struct intel_engine_cs *engine);
>  	void		(*unpark)(struct intel_engine_cs *engine);
> -- 
> 2.16.2
> 

Reviewed-by: Jeff McGee <jeff.mcgee@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions
  2018-03-20  0:18 ` [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions Chris Wilson
@ 2018-03-22 15:28   ` Jeff McGee
  0 siblings, 0 replies; 19+ messages in thread
From: Jeff McGee @ 2018-03-22 15:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Mar 20, 2018 at 12:18:47AM +0000, Chris Wilson wrote:
> In the next patch, we will make the execlists reset prepare callback
> take into account preemption by flushing the context-switch handler.
> This is not applicable to the GuC submission backend, so split the two
> into their own backend callbacks.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_guc_submission.c | 39 +++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c            | 11 +-------
>  2 files changed, 40 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 207cda062626..779c8d3156e5 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -776,6 +776,44 @@ static void guc_submission_tasklet(unsigned long data)
>  		guc_dequeue(engine);
>  }
>  
> +static struct i915_request *
> +guc_reset_prepare(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +
> +	/*
> +	 * Prevent request submission to the hardware until we have
> +	 * completed the reset in i915_gem_reset_finish(). If a request
> +	 * is completed by one engine, it may then queue a request
> +	 * to a second via its execlists->tasklet *just* as we are
> +	 * calling engine->init_hw() and also writing the ELSP.
> +	 * Turning off the execlists->tasklet until the reset is over
> +	 * prevents the race.
> +	 *
> +	 * Note that this needs to be a single atomic operation on the
> +	 * tasklet (flush existing tasks, prevent new tasks) to prevent
> +	 * a race between reset and set-wedged. It is not, so we do the best
> +	 * we can atm and make sure we don't lock the machine up in the more
> +	 * common case of recursively being called from set-wedged from inside
> +	 * i915_reset.
> +	 */
> +	if (!atomic_read(&execlists->tasklet.count))
> +		tasklet_kill(&execlists->tasklet);
> +	tasklet_disable(&execlists->tasklet);
> +
> +	/*
> +	 * We're using worker to queue preemption requests from the tasklet in
> +	 * GuC submission mode.
> +	 * Even though tasklet was disabled, we may still have a worker queued.
> +	 * Let's make sure that all workers scheduled before disabling the
> +	 * tasklet are completed before continuing with the reset.
> +	 */
> +	if (engine->i915->guc.preempt_wq)
> +		flush_workqueue(engine->i915->guc.preempt_wq);
> +
> +	return i915_gem_find_active_request(engine);
> +}
> +
>  /*
>   * Everything below here is concerned with setup & teardown, and is
>   * therefore not part of the somewhat time-critical batch-submission
> @@ -1235,6 +1273,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
>  			&engine->execlists;
>  
>  		execlists->tasklet.func = guc_submission_tasklet;
> +		engine->reset.prepare = guc_reset_prepare;
>  		engine->park = guc_submission_park;
>  		engine->unpark = guc_submission_unpark;
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f662a9524233..e5a3ffbc273a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1688,16 +1688,6 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>  		tasklet_kill(&execlists->tasklet);
>  	tasklet_disable(&execlists->tasklet);
>  
> -	/*
> -	 * We're using worker to queue preemption requests from the tasklet in
> -	 * GuC submission mode.
> -	 * Even though tasklet was disabled, we may still have a worker queued.
> -	 * Let's make sure that all workers scheduled before disabling the
> -	 * tasklet are completed before continuing with the reset.
> -	 */
> -	if (engine->i915->guc.preempt_wq)
> -		flush_workqueue(engine->i915->guc.preempt_wq);
> -
>  	return i915_gem_find_active_request(engine);
>  }
>  
> @@ -2115,6 +2105,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>  	engine->cancel_requests = execlists_cancel_requests;
>  	engine->schedule = execlists_schedule;
>  	engine->execlists.tasklet.func = execlists_submission_tasklet;
> +	engine->reset.prepare = execlists_reset_prepare;
>  
>  	engine->park = NULL;
>  	engine->unpark = NULL;
> -- 
> 2.16.2
> 

Reviewed-by: Jeff McGee <jeff.mcgee@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset
  2018-03-22 15:14     ` Jeff McGee
@ 2018-03-26 11:28       ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-03-26 11:28 UTC (permalink / raw)
  To: Jeff McGee; +Cc: intel-gfx

Quoting Jeff McGee (2018-03-22 15:14:01)
> On Tue, Mar 20, 2018 at 05:17:34PM -0700, Jeff McGee wrote:
> > On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote:
> > > Catch up with the inflight CSB events, after disabling the tasklet
> > > before deciding which request was truly guilty of hanging the GPU.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > > CC: Michel Thierry <michel.thierry@intel.com>
> > > Cc: Jeff McGee <jeff.mcgee@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
> > >  1 file changed, 197 insertions(+), 158 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > > index e5a3ffbc273a..beb81f13a3cc 100644
> > > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > > @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> > >     local_irq_restore(flags);
> > >  }
> > >  
> > > -/*
> > > - * Check the unread Context Status Buffers and manage the submission of new
> > > - * contexts to the ELSP accordingly.
> > > - */
> > > -static void execlists_submission_tasklet(unsigned long data)
> > > +static void process_csb(struct intel_engine_cs *engine)
> > >  {
> > > -   struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> > >     struct intel_engine_execlists * const execlists = &engine->execlists;
> > >     struct execlist_port * const port = execlists->port;
> > > -   struct drm_i915_private *dev_priv = engine->i915;
> > > +   struct drm_i915_private *i915 = engine->i915;
> > > +   unsigned int head, tail;
> > > +   const u32 *buf;
> > >     bool fw = false;
> > >  
> > > -   /* We can skip acquiring intel_runtime_pm_get() here as it was taken
> > > -    * on our behalf by the request (see i915_gem_mark_busy()) and it will
> > > -    * not be relinquished until the device is idle (see
> > > -    * i915_gem_idle_work_handler()). As a precaution, we make sure
> > > -    * that all ELSP are drained i.e. we have processed the CSB,
> > > -    * before allowing ourselves to idle and calling intel_runtime_pm_put().
> > > -    */
> > > -   GEM_BUG_ON(!dev_priv->gt.awake);
> > > +   if (unlikely(execlists->csb_use_mmio)) {
> > > +           buf = (u32 * __force)
> > > +                   (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > > +           execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> > > +   } else {
> > > +           /* The HWSP contains a (cacheable) mirror of the CSB */
> > > +           buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > > +   }
> > >  
> > > -   /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
> > > -    * imposing the cost of a locked atomic transaction when submitting a
> > > -    * new request (outside of the context-switch interrupt).
> > > +   /*
> > > +    * The write will be ordered by the uncached read (itself
> > > +    * a memory barrier), so we do not need another in the form
> > > +    * of a locked instruction. The race between the interrupt
> > > +    * handler and the split test/clear is harmless as we order
> > > +    * our clear before the CSB read. If the interrupt arrived
> > > +    * first between the test and the clear, we read the updated
> > > +    * CSB and clear the bit. If the interrupt arrives as we read
> > > +    * the CSB or later (i.e. after we had cleared the bit) the bit
> > > +    * is set and we do a new loop.
> > >      */
> > > -   while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
> > > -           /* The HWSP contains a (cacheable) mirror of the CSB */
> > > -           const u32 *buf =
> > > -                   &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > > -           unsigned int head, tail;
> > > -
> > > -           if (unlikely(execlists->csb_use_mmio)) {
> > > -                   buf = (u32 * __force)
> > > -                           (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > > -                   execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> > > -           }
> > > +   __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > > +   if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> > > +           intel_uncore_forcewake_get(i915, execlists->fw_domains);
> > > +           fw = true;
> > > +
> > > +           head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > > +           tail = GEN8_CSB_WRITE_PTR(head);
> > > +           head = GEN8_CSB_READ_PTR(head);
> > > +           execlists->csb_head = head;
> > > +   } else {
> > > +           const int write_idx =
> > > +                   intel_hws_csb_write_index(i915) -
> > > +                   I915_HWS_CSB_BUF0_INDEX;
> > > +
> > > +           head = execlists->csb_head;
> > > +           tail = READ_ONCE(buf[write_idx]);
> > > +   }
> > > +   GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > > +             engine->name,
> > > +             head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > > +             tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > > +
> > > +   while (head != tail) {
> > > +           struct i915_request *rq;
> > > +           unsigned int status;
> > > +           unsigned int count;
> > > +
> > > +           if (++head == GEN8_CSB_ENTRIES)
> > > +                   head = 0;
> > >  
> > > -           /* The write will be ordered by the uncached read (itself
> > > -            * a memory barrier), so we do not need another in the form
> > > -            * of a locked instruction. The race between the interrupt
> > > -            * handler and the split test/clear is harmless as we order
> > > -            * our clear before the CSB read. If the interrupt arrived
> > > -            * first between the test and the clear, we read the updated
> > > -            * CSB and clear the bit. If the interrupt arrives as we read
> > > -            * the CSB or later (i.e. after we had cleared the bit) the bit
> > > -            * is set and we do a new loop.
> > > +           /*
> > > +            * We are flying near dragons again.
> > > +            *
> > > +            * We hold a reference to the request in execlist_port[]
> > > +            * but no more than that. We are operating in softirq
> > > +            * context and so cannot hold any mutex or sleep. That
> > > +            * prevents us stopping the requests we are processing
> > > +            * in port[] from being retired simultaneously (the
> > > +            * breadcrumb will be complete before we see the
> > > +            * context-switch). As we only hold the reference to the
> > > +            * request, any pointer chasing underneath the request
> > > +            * is subject to a potential use-after-free. Thus we
> > > +            * store all of the bookkeeping within port[] as
> > > +            * required, and avoid using unguarded pointers beneath
> > > +            * request itself. The same applies to the atomic
> > > +            * status notifier.
> > >              */
> > > -           __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > > -           if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> > > -                   if (!fw) {
> > > -                           intel_uncore_forcewake_get(dev_priv,
> > > -                                                      execlists->fw_domains);
> > > -                           fw = true;
> > > -                   }
> > >  
> > > -                   head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > > -                   tail = GEN8_CSB_WRITE_PTR(head);
> > > -                   head = GEN8_CSB_READ_PTR(head);
> > > -                   execlists->csb_head = head;
> > > -           } else {
> > > -                   const int write_idx =
> > > -                           intel_hws_csb_write_index(dev_priv) -
> > > -                           I915_HWS_CSB_BUF0_INDEX;
> > > +           status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > > +           GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > > +                     engine->name, head,
> > > +                     status, buf[2*head + 1],
> > > +                     execlists->active);
> > > +
> > > +           if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > > +                         GEN8_CTX_STATUS_PREEMPTED))
> > > +                   execlists_set_active(execlists,
> > > +                                        EXECLISTS_ACTIVE_HWACK);
> > > +           if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > > +                   execlists_clear_active(execlists,
> > > +                                          EXECLISTS_ACTIVE_HWACK);
> > > +
> > > +           if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > > +                   continue;
> > >  
> > > -                   head = execlists->csb_head;
> > > -                   tail = READ_ONCE(buf[write_idx]);
> > > -           }
> > > -           GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > > -                     engine->name,
> > > -                     head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > > -                     tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > > +           /* We should never get a COMPLETED | IDLE_ACTIVE! */
> > > +           GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> > >  
> > > -           while (head != tail) {
> > > -                   struct i915_request *rq;
> > > -                   unsigned int status;
> > > -                   unsigned int count;
> > > +           if (status & GEN8_CTX_STATUS_COMPLETE &&
> > > +               buf[2*head + 1] == execlists->preempt_complete_status) {
> > > +                   GEM_TRACE("%s preempt-idle\n", engine->name);
> > > +                   complete_preempt_context(execlists);
> > > +                   continue;
> > > +           }
> > >  
> > > -                   if (++head == GEN8_CSB_ENTRIES)
> > > -                           head = 0;
> > > +           if (status & GEN8_CTX_STATUS_PREEMPTED &&
> > > +               execlists_is_active(execlists,
> > > +                                   EXECLISTS_ACTIVE_PREEMPT))
> > > +                   continue;
> > >  
> > > -                   /* We are flying near dragons again.
> > > -                    *
> > > -                    * We hold a reference to the request in execlist_port[]
> > > -                    * but no more than that. We are operating in softirq
> > > -                    * context and so cannot hold any mutex or sleep. That
> > > -                    * prevents us stopping the requests we are processing
> > > -                    * in port[] from being retired simultaneously (the
> > > -                    * breadcrumb will be complete before we see the
> > > -                    * context-switch). As we only hold the reference to the
> > > -                    * request, any pointer chasing underneath the request
> > > -                    * is subject to a potential use-after-free. Thus we
> > > -                    * store all of the bookkeeping within port[] as
> > > -                    * required, and avoid using unguarded pointers beneath
> > > -                    * request itself. The same applies to the atomic
> > > -                    * status notifier.
> > > -                    */
> > > +           GEM_BUG_ON(!execlists_is_active(execlists,
> > > +                                           EXECLISTS_ACTIVE_USER));
> > >  
> > > -                   status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > > -                   GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > > -                             engine->name, head,
> > > -                             status, buf[2*head + 1],
> > > -                             execlists->active);
> > > -
> > > -                   if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > > -                                 GEN8_CTX_STATUS_PREEMPTED))
> > > -                           execlists_set_active(execlists,
> > > -                                                EXECLISTS_ACTIVE_HWACK);
> > > -                   if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > > -                           execlists_clear_active(execlists,
> > > -                                                  EXECLISTS_ACTIVE_HWACK);
> > > -
> > > -                   if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > > -                           continue;
> > > +           rq = port_unpack(port, &count);
> > > +           GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> > > +                     engine->name,
> > > +                     port->context_id, count,
> > > +                     rq ? rq->global_seqno : 0,
> > > +                     rq ? rq_prio(rq) : 0);
> > > +
> > > +           /* Check the context/desc id for this event matches */
> > > +           GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> > > +
> > > +           GEM_BUG_ON(count == 0);
> > > +           if (--count == 0) {
> > > +                   GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> > > +                   GEM_BUG_ON(port_isset(&port[1]) &&
> > > +                              !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> > > +                   GEM_BUG_ON(!i915_request_completed(rq));
> > > +                   execlists_context_schedule_out(rq);
> > > +                   trace_i915_request_out(rq);
> > > +                   i915_request_put(rq);
> > > +
> > > +                   GEM_TRACE("%s completed ctx=%d\n",
> > > +                             engine->name, port->context_id);
> > > +
> > > +                   execlists_port_complete(execlists, port);
> > > +           } else {
> > > +                   port_set(port, port_pack(rq, count));
> > > +           }
> > >  
> > > -                   /* We should never get a COMPLETED | IDLE_ACTIVE! */
> > > -                   GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> > > +           /* After the final element, the hw should be idle */
> > > +           GEM_BUG_ON(port_count(port) == 0 &&
> > > +                      !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > > +           if (port_count(port) == 0)
> > > +                   execlists_clear_active(execlists,
> > > +                                          EXECLISTS_ACTIVE_USER);
> > > +   }
> > >  
> > > -                   if (status & GEN8_CTX_STATUS_COMPLETE &&
> > > -                       buf[2*head + 1] == execlists->preempt_complete_status) {
> > > -                           GEM_TRACE("%s preempt-idle\n", engine->name);
> > > -                           complete_preempt_context(execlists);
> > > -                           continue;
> > > -                   }
> > > +   if (head != execlists->csb_head) {
> > > +           execlists->csb_head = head;
> > > +           writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > > +                  i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > > +   }
> > >  
> > > -                   if (status & GEN8_CTX_STATUS_PREEMPTED &&
> > > -                       execlists_is_active(execlists,
> > > -                                           EXECLISTS_ACTIVE_PREEMPT))
> > > -                           continue;
> > > +   if (unlikely(fw))
> > > +           intel_uncore_forcewake_put(i915, execlists->fw_domains);
> > > +}
> > >  
> > > -                   GEM_BUG_ON(!execlists_is_active(execlists,
> > > -                                                   EXECLISTS_ACTIVE_USER));
> > > -
> > > -                   rq = port_unpack(port, &count);
> > > -                   GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> > > -                             engine->name,
> > > -                             port->context_id, count,
> > > -                             rq ? rq->global_seqno : 0,
> > > -                             rq ? rq_prio(rq) : 0);
> > > -
> > > -                   /* Check the context/desc id for this event matches */
> > > -                   GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> > > -
> > > -                   GEM_BUG_ON(count == 0);
> > > -                   if (--count == 0) {
> > > -                           GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> > > -                           GEM_BUG_ON(port_isset(&port[1]) &&
> > > -                                      !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> > > -                           GEM_BUG_ON(!i915_request_completed(rq));
> > > -                           execlists_context_schedule_out(rq);
> > > -                           trace_i915_request_out(rq);
> > > -                           i915_request_put(rq);
> > > -
> > > -                           GEM_TRACE("%s completed ctx=%d\n",
> > > -                                     engine->name, port->context_id);
> > > -
> > > -                           execlists_port_complete(execlists, port);
> > > -                   } else {
> > > -                           port_set(port, port_pack(rq, count));
> > > -                   }
> > > +/*
> > > + * Check the unread Context Status Buffers and manage the submission of new
> > > + * contexts to the ELSP accordingly.
> > > + */
> > > +static void execlists_submission_tasklet(unsigned long data)
> > > +{
> > > +   struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> > >  
> > > -                   /* After the final element, the hw should be idle */
> > > -                   GEM_BUG_ON(port_count(port) == 0 &&
> > > -                              !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > > -                   if (port_count(port) == 0)
> > > -                           execlists_clear_active(execlists,
> > > -                                                  EXECLISTS_ACTIVE_USER);
> > > -           }
> > > +   /*
> > > +    * We can skip acquiring intel_runtime_pm_get() here as it was taken
> > > +    * on our behalf by the request (see i915_gem_mark_busy()) and it will
> > > +    * not be relinquished until the device is idle (see
> > > +    * i915_gem_idle_work_handler()). As a precaution, we make sure
> > > +    * that all ELSP are drained i.e. we have processed the CSB,
> > > +    * before allowing ourselves to idle and calling intel_runtime_pm_put().
> > > +    */
> > > +   GEM_BUG_ON(!engine->i915->gt.awake);
> > >  
> > > -           if (head != execlists->csb_head) {
> > > -                   execlists->csb_head = head;
> > > -                   writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > > -                          dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > > -           }
> > > -   }
> > > +   /*
> > > +    * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> > > +    * imposing the cost of a locked atomic transaction when submitting a
> > > +    * new request (outside of the context-switch interrupt).
> > > +    */
> > > +   if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> > This was a 'while' before refactoring. Shouldn't it still be in order to catch
> > new irq_posted during processing?
> > 
> > BTW, I have revised and rebased the force preemption patches on drm-tip with
> > this and the other related patches you have proposed. I just started running
> > my IGT test that covers the innocent context on port[1] scenario that we
> > discussed on IRC. Getting a sporadic failure but need more time to root cause.
> > Will update soon.
> > 
> > > +           process_csb(engine);
> > >  
> > > -   if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> > > +   if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> > >             execlists_dequeue(engine);
> > > -
> > > -   if (fw)
> > > -           intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> > >  }
> > >  
> > >  static void queue_request(struct intel_engine_cs *engine,
> > > @@ -1667,6 +1673,7 @@ static struct i915_request *
> > >  execlists_reset_prepare(struct intel_engine_cs *engine)
> > >  {
> > >     struct intel_engine_execlists * const execlists = &engine->execlists;
> > > +   struct i915_request *request, *active;
> > >  
> > >     /*
> > >      * Prevent request submission to the hardware until we have
> > > @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> > >             tasklet_kill(&execlists->tasklet);
> > >     tasklet_disable(&execlists->tasklet);
> > >  
> > > -   return i915_gem_find_active_request(engine);
> > > +   /*
> > > +    * We want to flush the pending context switches, having disabled
> > > +    * the tasklet above, we can assume exclusive access to the execlists.
> > > +    * For this allows us to catch up with an inflight preemption event,
> > > +    * and avoid blaming an innocent request if the stall was due to the
> > > +    * preemption itself.
> > > +    */
> > > +   process_csb(engine);
> > > +
> > > +   /*
> > > +    * The last active request can then be no later than the last request
> > > +    * now in ELSP[0]. So search backwards from there, so that is the GPU
> > > +    * has advanced beyond the last CSB update, it will be pardoned.
> > > +    */
> > > +   active = NULL;
> > > +   request = port_request(execlists->port);
> > > +   if (request) {
> > > +           unsigned long flags;
> > > +
> > > +           spin_lock_irqsave(&engine->timeline->lock, flags);
> > > +           list_for_each_entry_from_reverse(request,
> > > +                                            &engine->timeline->requests,
> > > +                                            link) {
> > > +                   if (__i915_request_completed(request,
> > > +                                                request->global_seqno))
> > > +                           break;
> > > +
> > > +                   active = request;
> > > +           }
> > > +           spin_unlock_irqrestore(&engine->timeline->lock, flags);
> > > +   }
> > > +
> > > +   return active;
> 
> Now we can see an abort of the reset if process_csb() processes a completed
> preemption. So we need to kick the tasklet to get a dequeue of the waiting
> request to happen. Currently we only kick if needed in gen8_init_common_ring
> which is skipped if we abort the reset.

Yes, it is imperative that the tasklet be disabled and process_csb() be
manually flushed in the preemption timeout (because of ksortirqd we may
not have run the submission tasklet at all before the timer expires).
Hence the desire to split out process_csb().
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2018-03-26 11:28 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-20  0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
2018-03-20  0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
2018-03-22 15:26   ` Jeff McGee
2018-03-20  0:18 ` [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends Chris Wilson
2018-03-22 15:28   ` Jeff McGee
2018-03-20  0:18 ` [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions Chris Wilson
2018-03-22 15:28   ` Jeff McGee
2018-03-20  0:18 ` [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset Chris Wilson
2018-03-21  0:17   ` Jeff McGee
2018-03-22 15:14     ` Jeff McGee
2018-03-26 11:28       ` Chris Wilson
2018-03-20  0:39 ` [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Michel Thierry
2018-03-20  0:44   ` Chris Wilson
2018-03-20  0:56     ` Michel Thierry
2018-03-20  1:09       ` Chris Wilson
2018-03-20  1:24 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] " Patchwork
2018-03-20  1:25 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-03-20  1:41 ` ✓ Fi.CI.BAT: success " Patchwork
2018-03-20  6:45 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.