* [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
@ 2018-03-20 0:18 Chris Wilson
2018-03-20 0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
` (8 more replies)
0 siblings, 9 replies; 19+ messages in thread
From: Chris Wilson @ 2018-03-20 0:18 UTC (permalink / raw)
To: intel-gfx; +Cc: Mika Kuoppala
Not all callers want the GPU error to handled in the same way, so expose
a control parameter. In the first instance, some callers do not want the
heavyweight error capture so add a bit to request the state to be
captured and saved.
v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
reason for the reset in the dev_notice(), superseding the earlier option
to not print that notice.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 4 +--
drivers/gpu/drm/i915/i915_drv.c | 17 +++++------
drivers/gpu/drm/i915/i915_drv.h | 10 +++---
drivers/gpu/drm/i915/i915_irq.c | 39 +++++++++++++-----------
drivers/gpu/drm/i915/intel_hangcheck.c | 13 ++++----
drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
6 files changed, 48 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 964ea1a12357..7816cd53100a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4011,8 +4011,8 @@ i915_wedged_set(void *data, u64 val)
engine->hangcheck.stalled = true;
}
- i915_handle_error(i915, val, "Manually set wedged engine mask = %llx",
- val);
+ i915_handle_error(i915, val, I915_ERROR_CAPTURE,
+ "Manually set wedged engine mask = %llx", val);
wait_on_bit(&i915->gpu_error.flags,
I915_RESET_HANDOFF,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1021bf40e236..204fa07e8f79 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1869,7 +1869,7 @@ static int i915_resume_switcheroo(struct drm_device *dev)
/**
* i915_reset - reset chip after a hang
* @i915: #drm_i915_private to reset
- * @flags: Instructions
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
*
* Reset the chip. Useful if a hang is detected. Marks the device as wedged
* on failure.
@@ -1884,7 +1884,7 @@ static int i915_resume_switcheroo(struct drm_device *dev)
* - re-init interrupt state
* - re-init display
*/
-void i915_reset(struct drm_i915_private *i915, unsigned int flags)
+void i915_reset(struct drm_i915_private *i915, const char *msg)
{
struct i915_gpu_error *error = &i915->gpu_error;
int ret;
@@ -1901,8 +1901,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
if (!i915_gem_unset_wedged(i915))
goto wakeup;
- if (!(flags & I915_RESET_QUIET))
- dev_notice(i915->drm.dev, "Resetting chip after gpu hang\n");
+ if (msg)
+ dev_notice(i915->drm.dev, "Resetting chip for %s\n", msg);
error->reset_count++;
disable_irq(i915->drm.irq);
@@ -2003,7 +2003,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
/**
* i915_reset_engine - reset GPU engine to recover from a hang
* @engine: engine to reset
- * @flags: options
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
*
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
@@ -2013,7 +2013,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
* - reset engine (which will force the engine to idle)
* - re-init/configure engine
*/
-int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
+int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
{
struct i915_gpu_error *error = &engine->i915->gpu_error;
struct i915_request *active_request;
@@ -2028,10 +2028,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
goto out;
}
- if (!(flags & I915_RESET_QUIET)) {
+ if (msg)
dev_notice(engine->i915->drm.dev,
- "Resetting %s after gpu hang\n", engine->name);
- }
+ "Resetting %s for %s\n", engine->name, msg);
error->reset_engine_count[engine->id]++;
if (!engine->i915->guc.execbuf_client)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e27ba8fb64e6..29ef6c16bbe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2700,10 +2700,8 @@ extern void i915_driver_unload(struct drm_device *dev);
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
-#define I915_RESET_QUIET BIT(0)
-extern void i915_reset(struct drm_i915_private *i915, unsigned int flags);
-extern int i915_reset_engine(struct intel_engine_cs *engine,
- unsigned int flags);
+extern void i915_reset(struct drm_i915_private *i915, const char *msg);
+extern int i915_reset_engine(struct intel_engine_cs *engine, const char *msg);
extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
extern int intel_reset_guc(struct drm_i915_private *dev_priv);
@@ -2751,10 +2749,12 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
&dev_priv->gpu_error.hangcheck_work, delay);
}
-__printf(3, 4)
+__printf(4, 5)
void i915_handle_error(struct drm_i915_private *dev_priv,
u32 engine_mask,
+ unsigned long flags,
const char *fmt, ...);
+#define I915_ERROR_CAPTURE BIT(0)
extern void intel_irq_init(struct drm_i915_private *dev_priv);
extern void intel_irq_fini(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 44eef355e12c..dbdb11ec38f6 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2877,14 +2877,8 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
return IRQ_HANDLED;
}
-/**
- * i915_reset_device - do process context error handling work
- * @dev_priv: i915 device private
- *
- * Fire an error uevent so userspace can see that a hang or error
- * was detected.
- */
-static void i915_reset_device(struct drm_i915_private *dev_priv)
+static void i915_reset_device(struct drm_i915_private *dev_priv,
+ const char *msg)
{
struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
@@ -2910,7 +2904,7 @@ static void i915_reset_device(struct drm_i915_private *dev_priv)
*/
do {
if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
- i915_reset(dev_priv, 0);
+ i915_reset(dev_priv, msg);
mutex_unlock(&dev_priv->drm.struct_mutex);
}
} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
@@ -2955,6 +2949,7 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
* i915_handle_error - handle a gpu error
* @dev_priv: i915 device private
* @engine_mask: mask representing engines that are hung
+ * @flags: control flags
* @fmt: Error message format string
*
* Do some basic checking of register state at error time and
@@ -2965,16 +2960,23 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
*/
void i915_handle_error(struct drm_i915_private *dev_priv,
u32 engine_mask,
+ unsigned long flags,
const char *fmt, ...)
{
struct intel_engine_cs *engine;
unsigned int tmp;
- va_list args;
char error_msg[80];
+ char *msg = NULL;
+
+ if (fmt) {
+ va_list args;
- va_start(args, fmt);
- vscnprintf(error_msg, sizeof(error_msg), fmt, args);
- va_end(args);
+ va_start(args, fmt);
+ vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+ va_end(args);
+
+ msg = error_msg;
+ }
/*
* In most cases it's guaranteed that we get here with an RPM
@@ -2986,8 +2988,11 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
intel_runtime_pm_get(dev_priv);
engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
- i915_capture_error_state(dev_priv, engine_mask, error_msg);
- i915_clear_error_registers(dev_priv);
+
+ if (flags & I915_ERROR_CAPTURE) {
+ i915_capture_error_state(dev_priv, engine_mask, msg);
+ i915_clear_error_registers(dev_priv);
+ }
/*
* Try engine reset when available. We fall back to full reset if
@@ -3000,7 +3005,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
&dev_priv->gpu_error.flags))
continue;
- if (i915_reset_engine(engine, 0) == 0)
+ if (i915_reset_engine(engine, msg) == 0)
engine_mask &= ~intel_engine_flag(engine);
clear_bit(I915_RESET_ENGINE + engine->id,
@@ -3030,7 +3035,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
TASK_UNINTERRUPTIBLE);
}
- i915_reset_device(dev_priv);
+ i915_reset_device(dev_priv, msg);
for_each_engine(engine, dev_priv, tmp) {
clear_bit(I915_RESET_ENGINE + engine->id,
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 42e45ae87393..fd0ffb8328d0 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
*/
tmp = I915_READ_CTL(engine);
if (tmp & RING_WAIT) {
- i915_handle_error(dev_priv, 0,
- "Kicking stuck wait on %s",
- engine->name);
+ i915_handle_error(dev_priv, BIT(engine->id), 0,
+ "stuck wait on %s", engine->name);
I915_WRITE_CTL(engine, tmp);
return ENGINE_WAIT_KICK;
}
@@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
default:
return ENGINE_DEAD;
case 1:
- i915_handle_error(dev_priv, 0,
- "Kicking stuck semaphore on %s",
+ i915_handle_error(dev_priv, ALL_ENGINES, 0,
+ "stuck semaphore on %s",
engine->name);
I915_WRITE_CTL(engine, tmp);
return ENGINE_WAIT_KICK;
@@ -386,13 +385,13 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915,
if (stuck != hung)
hung &= ~stuck;
len = scnprintf(msg, sizeof(msg),
- "%s on ", stuck == hung ? "No progress" : "Hang");
+ "%s on ", stuck == hung ? "no progress" : "hang");
for_each_engine_masked(engine, i915, hung, tmp)
len += scnprintf(msg + len, sizeof(msg) - len,
"%s, ", engine->name);
msg[len-2] = '\0';
- return i915_handle_error(i915, hung, "%s", msg);
+ return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg);
}
/*
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index df7898c8edcb..12682d985b9f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -433,7 +433,7 @@ static int igt_global_reset(void *arg)
mutex_lock(&i915->drm.struct_mutex);
reset_count = i915_reset_count(&i915->gpu_error);
- i915_reset(i915, I915_RESET_QUIET);
+ i915_reset(i915, NULL);
if (i915_reset_count(&i915->gpu_error) == reset_count) {
pr_err("No GPU reset recorded!\n");
@@ -518,7 +518,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
engine->hangcheck.seqno =
intel_engine_get_seqno(engine);
- err = i915_reset_engine(engine, I915_RESET_QUIET);
+ err = i915_reset_engine(engine, NULL);
if (err) {
pr_err("i915_reset_engine failed\n");
break;
@@ -725,7 +725,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915,
engine->hangcheck.seqno =
intel_engine_get_seqno(engine);
- err = i915_reset_engine(engine, I915_RESET_QUIET);
+ err = i915_reset_engine(engine, NULL);
if (err) {
pr_err("i915_reset_engine(%s:%s) failed, err=%d\n",
engine->name, active ? "active" : "idle", err);
@@ -865,7 +865,6 @@ static int igt_wait_reset(void *arg)
__func__, rq->fence.seqno, hws_seqno(&h, rq));
intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
- i915_reset(i915, 0);
i915_gem_set_wedged(i915);
err = -EIO;
@@ -962,7 +961,6 @@ static int igt_reset_queue(void *arg)
i915_request_put(rq);
i915_request_put(prev);
- i915_reset(i915, 0);
i915_gem_set_wedged(i915);
err = -EIO;
@@ -971,7 +969,7 @@ static int igt_reset_queue(void *arg)
reset_count = fake_hangcheck(prev);
- i915_reset(i915, I915_RESET_QUIET);
+ i915_reset(i915, NULL);
GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
&i915->gpu_error.flags));
@@ -1069,7 +1067,6 @@ static int igt_handle_error(void *arg)
__func__, rq->fence.seqno, hws_seqno(&h, rq));
intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
- i915_reset(i915, 0);
i915_gem_set_wedged(i915);
err = -EIO;
@@ -1084,7 +1081,7 @@ static int igt_handle_error(void *arg)
engine->hangcheck.stalled = true;
engine->hangcheck.seqno = intel_engine_get_seqno(engine);
- i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
+ i915_handle_error(i915, intel_engine_flag(engine), 0, NULL);
xchg(&i915->gpu_error.first_error, error);
--
2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context()
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
@ 2018-03-20 0:18 ` Chris Wilson
2018-03-22 15:26 ` Jeff McGee
2018-03-20 0:18 ` [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends Chris Wilson
` (7 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20 0:18 UTC (permalink / raw)
To: intel-gfx
As a complement to inject_preempt_context(), follow up with the function
to handle its completion. This will be useful should we wish to extend
the duties of the preempt-context for execlists.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
drivers/gpu/drm/i915/intel_lrc.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 53f1c009ed7b..0bfaeb56b8c7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -531,8 +531,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
if (execlists->ctrl_reg)
writel(EL_CTRL_LOAD, execlists->ctrl_reg);
- execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
- execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
+static void complete_preempt_context(struct intel_engine_execlists *execlists)
+{
+ execlists_cancel_port_requests(execlists);
+ execlists_unwind_incomplete_requests(execlists);
+
+ GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
}
static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -939,14 +948,7 @@ static void execlists_submission_tasklet(unsigned long data)
if (status & GEN8_CTX_STATUS_COMPLETE &&
buf[2*head + 1] == execlists->preempt_complete_status) {
GEM_TRACE("%s preempt-idle\n", engine->name);
-
- execlists_cancel_port_requests(execlists);
- execlists_unwind_incomplete_requests(execlists);
-
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT));
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT);
+ complete_preempt_context(execlists);
continue;
}
--
2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
2018-03-20 0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
@ 2018-03-20 0:18 ` Chris Wilson
2018-03-22 15:28 ` Jeff McGee
2018-03-20 0:18 ` [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions Chris Wilson
` (6 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20 0:18 UTC (permalink / raw)
To: intel-gfx
In preparation to more carefully handling incomplete preemption during
reset by execlists, we move the existing code wholesale to the backends
under a couple of new reset vfuncs.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 42 ++++----------------------
drivers/gpu/drm/i915/intel_lrc.c | 52 +++++++++++++++++++++++++++++++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++++++++++--
drivers/gpu/drm/i915/intel_ringbuffer.h | 9 ++++--
4 files changed, 78 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 802df8e1a544..38f7160d99c9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2917,7 +2917,7 @@ static bool engine_stalled(struct intel_engine_cs *engine)
struct i915_request *
i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
{
- struct i915_request *request = NULL;
+ struct i915_request *request;
/*
* During the reset sequence, we must prevent the engine from
@@ -2940,40 +2940,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
*/
kthread_park(engine->breadcrumbs.signaler);
- /*
- * Prevent request submission to the hardware until we have
- * completed the reset in i915_gem_reset_finish(). If a request
- * is completed by one engine, it may then queue a request
- * to a second via its execlists->tasklet *just* as we are
- * calling engine->init_hw() and also writing the ELSP.
- * Turning off the execlists->tasklet until the reset is over
- * prevents the race.
- *
- * Note that this needs to be a single atomic operation on the
- * tasklet (flush existing tasks, prevent new tasks) to prevent
- * a race between reset and set-wedged. It is not, so we do the best
- * we can atm and make sure we don't lock the machine up in the more
- * common case of recursively being called from set-wedged from inside
- * i915_reset.
- */
- if (!atomic_read(&engine->execlists.tasklet.count))
- tasklet_kill(&engine->execlists.tasklet);
- tasklet_disable(&engine->execlists.tasklet);
-
- /*
- * We're using worker to queue preemption requests from the tasklet in
- * GuC submission mode.
- * Even though tasklet was disabled, we may still have a worker queued.
- * Let's make sure that all workers scheduled before disabling the
- * tasklet are completed before continuing with the reset.
- */
- if (engine->i915->guc.preempt_wq)
- flush_workqueue(engine->i915->guc.preempt_wq);
-
- if (engine->irq_seqno_barrier)
- engine->irq_seqno_barrier(engine);
-
- request = i915_gem_find_active_request(engine);
+ request = engine->reset.prepare(engine);
if (request && request->fence.error == -EIO)
request = ERR_PTR(-EIO); /* Previous reset failed! */
@@ -3120,7 +3087,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
}
/* Setup the CS to resume from the breadcrumb of the hung request */
- engine->reset_hw(engine, request);
+ engine->reset.reset(engine, request);
}
void i915_gem_reset(struct drm_i915_private *dev_priv)
@@ -3172,7 +3139,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
{
- tasklet_enable(&engine->execlists.tasklet);
+ engine->reset.finish(engine);
+
kthread_unpark(engine->breadcrumbs.signaler);
intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0bfaeb56b8c7..f662a9524233 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1663,6 +1663,44 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
return init_workarounds_ring(engine);
}
+static struct i915_request *
+execlists_reset_prepare(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ /*
+ * Prevent request submission to the hardware until we have
+ * completed the reset in i915_gem_reset_finish(). If a request
+ * is completed by one engine, it may then queue a request
+ * to a second via its execlists->tasklet *just* as we are
+ * calling engine->init_hw() and also writing the ELSP.
+ * Turning off the execlists->tasklet until the reset is over
+ * prevents the race.
+ *
+ * Note that this needs to be a single atomic operation on the
+ * tasklet (flush existing tasks, prevent new tasks) to prevent
+ * a race between reset and set-wedged. It is not, so we do the best
+ * we can atm and make sure we don't lock the machine up in the more
+ * common case of recursively being called from set-wedged from inside
+ * i915_reset.
+ */
+ if (!atomic_read(&execlists->tasklet.count))
+ tasklet_kill(&execlists->tasklet);
+ tasklet_disable(&execlists->tasklet);
+
+ /*
+ * We're using worker to queue preemption requests from the tasklet in
+ * GuC submission mode.
+ * Even though tasklet was disabled, we may still have a worker queued.
+ * Let's make sure that all workers scheduled before disabling the
+ * tasklet are completed before continuing with the reset.
+ */
+ if (engine->i915->guc.preempt_wq)
+ flush_workqueue(engine->i915->guc.preempt_wq);
+
+ return i915_gem_find_active_request(engine);
+}
+
static void reset_irq(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1692,8 +1730,8 @@ static void reset_irq(struct intel_engine_cs *engine)
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
}
-static void reset_common_ring(struct intel_engine_cs *engine,
- struct i915_request *request)
+static void execlists_reset(struct intel_engine_cs *engine,
+ struct i915_request *request)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce;
@@ -1766,6 +1804,11 @@ static void reset_common_ring(struct intel_engine_cs *engine,
unwind_wa_tail(request);
}
+static void execlists_reset_finish(struct intel_engine_cs *engine)
+{
+ tasklet_enable(&engine->execlists.tasklet);
+}
+
static int intel_logical_ring_emit_pdps(struct i915_request *rq)
{
struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
@@ -2090,7 +2133,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{
/* Default vfuncs which can be overriden by each engine. */
engine->init_hw = gen8_init_common_ring;
- engine->reset_hw = reset_common_ring;
+
+ engine->reset.prepare = execlists_reset_prepare;
+ engine->reset.reset = execlists_reset;
+ engine->reset.finish = execlists_reset_finish;
engine->context_pin = execlists_context_pin;
engine->context_unpin = execlists_context_unpin;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 04d9d9a946a7..eebcc877ef60 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -530,8 +530,16 @@ static int init_ring_common(struct intel_engine_cs *engine)
return ret;
}
-static void reset_ring_common(struct intel_engine_cs *engine,
- struct i915_request *request)
+static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+{
+ if (engine->irq_seqno_barrier)
+ engine->irq_seqno_barrier(engine);
+
+ return i915_gem_find_active_request(engine);
+}
+
+static void reset_ring(struct intel_engine_cs *engine,
+ struct i915_request *request)
{
/*
* RC6 must be prevented until the reset is complete and the engine
@@ -595,6 +603,10 @@ static void reset_ring_common(struct intel_engine_cs *engine,
}
}
+static void reset_finish(struct intel_engine_cs *engine)
+{
+}
+
static int intel_rcs_ctx_init(struct i915_request *rq)
{
int ret;
@@ -1987,7 +1999,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
intel_ring_init_semaphores(dev_priv, engine);
engine->init_hw = init_ring_common;
- engine->reset_hw = reset_ring_common;
+ engine->reset.prepare = reset_prepare;
+ engine->reset.reset = reset_ring;
+ engine->reset.finish = reset_finish;
engine->context_pin = intel_ring_context_pin;
engine->context_unpin = intel_ring_context_unpin;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1f50727a5ddb..e2681303ce21 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -418,8 +418,13 @@ struct intel_engine_cs {
void (*irq_disable)(struct intel_engine_cs *engine);
int (*init_hw)(struct intel_engine_cs *engine);
- void (*reset_hw)(struct intel_engine_cs *engine,
- struct i915_request *rq);
+
+ struct {
+ struct i915_request *(*prepare)(struct intel_engine_cs *engine);
+ void (*reset)(struct intel_engine_cs *engine,
+ struct i915_request *rq);
+ void (*finish)(struct intel_engine_cs *engine);
+ } reset;
void (*park)(struct intel_engine_cs *engine);
void (*unpark)(struct intel_engine_cs *engine);
--
2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
2018-03-20 0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
2018-03-20 0:18 ` [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends Chris Wilson
@ 2018-03-20 0:18 ` Chris Wilson
2018-03-22 15:28 ` Jeff McGee
2018-03-20 0:18 ` [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset Chris Wilson
` (5 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20 0:18 UTC (permalink / raw)
To: intel-gfx
In the next patch, we will make the execlists reset prepare callback
take into account preemption by flushing the context-switch handler.
This is not applicable to the GuC submission backend, so split the two
into their own backend callbacks.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
drivers/gpu/drm/i915/intel_guc_submission.c | 39 +++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_lrc.c | 11 +-------
2 files changed, 40 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 207cda062626..779c8d3156e5 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -776,6 +776,44 @@ static void guc_submission_tasklet(unsigned long data)
guc_dequeue(engine);
}
+static struct i915_request *
+guc_reset_prepare(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ /*
+ * Prevent request submission to the hardware until we have
+ * completed the reset in i915_gem_reset_finish(). If a request
+ * is completed by one engine, it may then queue a request
+ * to a second via its execlists->tasklet *just* as we are
+ * calling engine->init_hw() and also writing the ELSP.
+ * Turning off the execlists->tasklet until the reset is over
+ * prevents the race.
+ *
+ * Note that this needs to be a single atomic operation on the
+ * tasklet (flush existing tasks, prevent new tasks) to prevent
+ * a race between reset and set-wedged. It is not, so we do the best
+ * we can atm and make sure we don't lock the machine up in the more
+ * common case of recursively being called from set-wedged from inside
+ * i915_reset.
+ */
+ if (!atomic_read(&execlists->tasklet.count))
+ tasklet_kill(&execlists->tasklet);
+ tasklet_disable(&execlists->tasklet);
+
+ /*
+ * We're using worker to queue preemption requests from the tasklet in
+ * GuC submission mode.
+ * Even though tasklet was disabled, we may still have a worker queued.
+ * Let's make sure that all workers scheduled before disabling the
+ * tasklet are completed before continuing with the reset.
+ */
+ if (engine->i915->guc.preempt_wq)
+ flush_workqueue(engine->i915->guc.preempt_wq);
+
+ return i915_gem_find_active_request(engine);
+}
+
/*
* Everything below here is concerned with setup & teardown, and is
* therefore not part of the somewhat time-critical batch-submission
@@ -1235,6 +1273,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
&engine->execlists;
execlists->tasklet.func = guc_submission_tasklet;
+ engine->reset.prepare = guc_reset_prepare;
engine->park = guc_submission_park;
engine->unpark = guc_submission_unpark;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f662a9524233..e5a3ffbc273a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1688,16 +1688,6 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
tasklet_kill(&execlists->tasklet);
tasklet_disable(&execlists->tasklet);
- /*
- * We're using worker to queue preemption requests from the tasklet in
- * GuC submission mode.
- * Even though tasklet was disabled, we may still have a worker queued.
- * Let's make sure that all workers scheduled before disabling the
- * tasklet are completed before continuing with the reset.
- */
- if (engine->i915->guc.preempt_wq)
- flush_workqueue(engine->i915->guc.preempt_wq);
-
return i915_gem_find_active_request(engine);
}
@@ -2115,6 +2105,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
engine->cancel_requests = execlists_cancel_requests;
engine->schedule = execlists_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
+ engine->reset.prepare = execlists_reset_prepare;
engine->park = NULL;
engine->unpark = NULL;
--
2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
` (2 preceding siblings ...)
2018-03-20 0:18 ` [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions Chris Wilson
@ 2018-03-20 0:18 ` Chris Wilson
2018-03-21 0:17 ` Jeff McGee
2018-03-20 0:39 ` [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Michel Thierry
` (4 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20 0:18 UTC (permalink / raw)
To: intel-gfx
Catch up with the inflight CSB events, after disabling the tasklet
before deciding which request was truly guilty of hanging the GPU.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
1 file changed, 197 insertions(+), 158 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e5a3ffbc273a..beb81f13a3cc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
local_irq_restore(flags);
}
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
+static void process_csb(struct intel_engine_cs *engine)
{
- struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port * const port = execlists->port;
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *i915 = engine->i915;
+ unsigned int head, tail;
+ const u32 *buf;
bool fw = false;
- /* We can skip acquiring intel_runtime_pm_get() here as it was taken
- * on our behalf by the request (see i915_gem_mark_busy()) and it will
- * not be relinquished until the device is idle (see
- * i915_gem_idle_work_handler()). As a precaution, we make sure
- * that all ELSP are drained i.e. we have processed the CSB,
- * before allowing ourselves to idle and calling intel_runtime_pm_put().
- */
- GEM_BUG_ON(!dev_priv->gt.awake);
+ if (unlikely(execlists->csb_use_mmio)) {
+ buf = (u32 * __force)
+ (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+ execlists->csb_head = -1; /* force mmio read of CSB ptrs */
+ } else {
+ /* The HWSP contains a (cacheable) mirror of the CSB */
+ buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+ }
- /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
- * imposing the cost of a locked atomic transaction when submitting a
- * new request (outside of the context-switch interrupt).
+ /*
+ * The write will be ordered by the uncached read (itself
+ * a memory barrier), so we do not need another in the form
+ * of a locked instruction. The race between the interrupt
+ * handler and the split test/clear is harmless as we order
+ * our clear before the CSB read. If the interrupt arrived
+ * first between the test and the clear, we read the updated
+ * CSB and clear the bit. If the interrupt arrives as we read
+ * the CSB or later (i.e. after we had cleared the bit) the bit
+ * is set and we do a new loop.
*/
- while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
- /* The HWSP contains a (cacheable) mirror of the CSB */
- const u32 *buf =
- &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
- unsigned int head, tail;
-
- if (unlikely(execlists->csb_use_mmio)) {
- buf = (u32 * __force)
- (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
- execlists->csb_head = -1; /* force mmio read of CSB ptrs */
- }
+ __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+ intel_uncore_forcewake_get(i915, execlists->fw_domains);
+ fw = true;
+
+ head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ tail = GEN8_CSB_WRITE_PTR(head);
+ head = GEN8_CSB_READ_PTR(head);
+ execlists->csb_head = head;
+ } else {
+ const int write_idx =
+ intel_hws_csb_write_index(i915) -
+ I915_HWS_CSB_BUF0_INDEX;
+
+ head = execlists->csb_head;
+ tail = READ_ONCE(buf[write_idx]);
+ }
+ GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+ engine->name,
+ head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+ tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+
+ while (head != tail) {
+ struct i915_request *rq;
+ unsigned int status;
+ unsigned int count;
+
+ if (++head == GEN8_CSB_ENTRIES)
+ head = 0;
- /* The write will be ordered by the uncached read (itself
- * a memory barrier), so we do not need another in the form
- * of a locked instruction. The race between the interrupt
- * handler and the split test/clear is harmless as we order
- * our clear before the CSB read. If the interrupt arrived
- * first between the test and the clear, we read the updated
- * CSB and clear the bit. If the interrupt arrives as we read
- * the CSB or later (i.e. after we had cleared the bit) the bit
- * is set and we do a new loop.
+ /*
+ * We are flying near dragons again.
+ *
+ * We hold a reference to the request in execlist_port[]
+ * but no more than that. We are operating in softirq
+ * context and so cannot hold any mutex or sleep. That
+ * prevents us stopping the requests we are processing
+ * in port[] from being retired simultaneously (the
+ * breadcrumb will be complete before we see the
+ * context-switch). As we only hold the reference to the
+ * request, any pointer chasing underneath the request
+ * is subject to a potential use-after-free. Thus we
+ * store all of the bookkeeping within port[] as
+ * required, and avoid using unguarded pointers beneath
+ * request itself. The same applies to the atomic
+ * status notifier.
*/
- __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- if (unlikely(execlists->csb_head == -1)) { /* following a reset */
- if (!fw) {
- intel_uncore_forcewake_get(dev_priv,
- execlists->fw_domains);
- fw = true;
- }
- head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
- tail = GEN8_CSB_WRITE_PTR(head);
- head = GEN8_CSB_READ_PTR(head);
- execlists->csb_head = head;
- } else {
- const int write_idx =
- intel_hws_csb_write_index(dev_priv) -
- I915_HWS_CSB_BUF0_INDEX;
+ status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+ engine->name, head,
+ status, buf[2*head + 1],
+ execlists->active);
+
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
- head = execlists->csb_head;
- tail = READ_ONCE(buf[write_idx]);
- }
- GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
- engine->name,
- head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
- tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- while (head != tail) {
- struct i915_request *rq;
- unsigned int status;
- unsigned int count;
+ if (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status) {
+ GEM_TRACE("%s preempt-idle\n", engine->name);
+ complete_preempt_context(execlists);
+ continue;
+ }
- if (++head == GEN8_CSB_ENTRIES)
- head = 0;
+ if (status & GEN8_CTX_STATUS_PREEMPTED &&
+ execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT))
+ continue;
- /* We are flying near dragons again.
- *
- * We hold a reference to the request in execlist_port[]
- * but no more than that. We are operating in softirq
- * context and so cannot hold any mutex or sleep. That
- * prevents us stopping the requests we are processing
- * in port[] from being retired simultaneously (the
- * breadcrumb will be complete before we see the
- * context-switch). As we only hold the reference to the
- * request, any pointer chasing underneath the request
- * is subject to a potential use-after-free. Thus we
- * store all of the bookkeeping within port[] as
- * required, and avoid using unguarded pointers beneath
- * request itself. The same applies to the atomic
- * status notifier.
- */
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_USER));
- status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
- engine->name, head,
- status, buf[2*head + 1],
- execlists->active);
-
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ rq = port_unpack(port, &count);
+ GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+ engine->name,
+ port->context_id, count,
+ rq ? rq->global_seqno : 0,
+ rq ? rq_prio(rq) : 0);
+
+ /* Check the context/desc id for this event matches */
+ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+ GEM_BUG_ON(count == 0);
+ if (--count == 0) {
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+ GEM_BUG_ON(port_isset(&port[1]) &&
+ !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+ GEM_BUG_ON(!i915_request_completed(rq));
+ execlists_context_schedule_out(rq);
+ trace_i915_request_out(rq);
+ i915_request_put(rq);
+
+ GEM_TRACE("%s completed ctx=%d\n",
+ engine->name, port->context_id);
+
+ execlists_port_complete(execlists, port);
+ } else {
+ port_set(port, port_pack(rq, count));
+ }
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ /* After the final element, the hw should be idle */
+ GEM_BUG_ON(port_count(port) == 0 &&
+ !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+ if (port_count(port) == 0)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_USER);
+ }
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
- GEM_TRACE("%s preempt-idle\n", engine->name);
- complete_preempt_context(execlists);
- continue;
- }
+ if (head != execlists->csb_head) {
+ execlists->csb_head = head;
+ writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+ i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ }
- if (status & GEN8_CTX_STATUS_PREEMPTED &&
- execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT))
- continue;
+ if (unlikely(fw))
+ intel_uncore_forcewake_put(i915, execlists->fw_domains);
+}
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
-
- rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
- engine->name,
- port->context_id, count,
- rq ? rq->global_seqno : 0,
- rq ? rq_prio(rq) : 0);
-
- /* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
- GEM_BUG_ON(count == 0);
- if (--count == 0) {
- GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
- GEM_BUG_ON(!i915_request_completed(rq));
- execlists_context_schedule_out(rq);
- trace_i915_request_out(rq);
- i915_request_put(rq);
-
- GEM_TRACE("%s completed ctx=%d\n",
- engine->name, port->context_id);
-
- execlists_port_complete(execlists, port);
- } else {
- port_set(port, port_pack(rq, count));
- }
+/*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+static void execlists_submission_tasklet(unsigned long data)
+{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
- /* After the final element, the hw should be idle */
- GEM_BUG_ON(port_count(port) == 0 &&
- !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
- if (port_count(port) == 0)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_USER);
- }
+ /*
+ * We can skip acquiring intel_runtime_pm_get() here as it was taken
+ * on our behalf by the request (see i915_gem_mark_busy()) and it will
+ * not be relinquished until the device is idle (see
+ * i915_gem_idle_work_handler()). As a precaution, we make sure
+ * that all ELSP are drained i.e. we have processed the CSB,
+ * before allowing ourselves to idle and calling intel_runtime_pm_put().
+ */
+ GEM_BUG_ON(!engine->i915->gt.awake);
- if (head != execlists->csb_head) {
- execlists->csb_head = head;
- writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
- dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
- }
- }
+ /*
+ * Prefer doing test_and_clear_bit() as a two stage operation to avoid
+ * imposing the cost of a locked atomic transaction when submitting a
+ * new request (outside of the context-switch interrupt).
+ */
+ if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+ process_csb(engine);
- if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
+ if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
-
- if (fw)
- intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
static void queue_request(struct intel_engine_cs *engine,
@@ -1667,6 +1673,7 @@ static struct i915_request *
execlists_reset_prepare(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *request, *active;
/*
* Prevent request submission to the hardware until we have
@@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
tasklet_kill(&execlists->tasklet);
tasklet_disable(&execlists->tasklet);
- return i915_gem_find_active_request(engine);
+ /*
+ * We want to flush the pending context switches, having disabled
+ * the tasklet above, we can assume exclusive access to the execlists.
+ * For this allows us to catch up with an inflight preemption event,
+ * and avoid blaming an innocent request if the stall was due to the
+ * preemption itself.
+ */
+ process_csb(engine);
+
+ /*
+ * The last active request can then be no later than the last request
+ * now in ELSP[0]. So search backwards from there, so that is the GPU
+ * has advanced beyond the last CSB update, it will be pardoned.
+ */
+ active = NULL;
+ request = port_request(execlists->port);
+ if (request) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->timeline->lock, flags);
+ list_for_each_entry_from_reverse(request,
+ &engine->timeline->requests,
+ link) {
+ if (__i915_request_completed(request,
+ request->global_seqno))
+ break;
+
+ active = request;
+ }
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
+ }
+
+ return active;
}
static void reset_irq(struct intel_engine_cs *engine)
--
2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
` (3 preceding siblings ...)
2018-03-20 0:18 ` [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset Chris Wilson
@ 2018-03-20 0:39 ` Michel Thierry
2018-03-20 0:44 ` Chris Wilson
2018-03-20 1:24 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] " Patchwork
` (3 subsequent siblings)
8 siblings, 1 reply; 19+ messages in thread
From: Michel Thierry @ 2018-03-20 0:39 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala
On 3/19/2018 5:18 PM, Chris Wilson wrote:
> Not all callers want the GPU error to handled in the same way, so expose
> a control parameter. In the first instance, some callers do not want the
> heavyweight error capture so add a bit to request the state to be
> captured and saved.
>
> v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
> reason for the reset in the dev_notice(), superseding the earlier option
> to not print that notice.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 4 +--
> drivers/gpu/drm/i915/i915_drv.c | 17 +++++------
> drivers/gpu/drm/i915/i915_drv.h | 10 +++---
> drivers/gpu/drm/i915/i915_irq.c | 39 +++++++++++++-----------
> drivers/gpu/drm/i915/intel_hangcheck.c | 13 ++++----
> drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
> 6 files changed, 48 insertions(+), 48 deletions(-)
>
...
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> index 42e45ae87393..fd0ffb8328d0 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
> */
> tmp = I915_READ_CTL(engine);
> if (tmp & RING_WAIT) {
> - i915_handle_error(dev_priv, 0,
> - "Kicking stuck wait on %s",
> - engine->name);
> + i915_handle_error(dev_priv, BIT(engine->id), 0,
> + "stuck wait on %s", engine->name);
Before we were not resetting anything here, is this change on purpose?
(if it is, it's worth adding it to the commit msg since it's changing
behavior).
> I915_WRITE_CTL(engine, tmp);
> return ENGINE_WAIT_KICK;
> } > @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64
acthd)
> default:
> return ENGINE_DEAD;
> case 1:
> - i915_handle_error(dev_priv, 0,
> - "Kicking stuck semaphore on %s",
> + i915_handle_error(dev_priv, ALL_ENGINES, 0,
Same here,
> + "stuck semaphore on %s",
> engine->name);
> I915_WRITE_CTL(engine, tmp);
> return ENGINE_WAIT_KICK;
Everything else looks OK to me.
-Michel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
2018-03-20 0:39 ` [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Michel Thierry
@ 2018-03-20 0:44 ` Chris Wilson
2018-03-20 0:56 ` Michel Thierry
0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2018-03-20 0:44 UTC (permalink / raw)
To: Michel Thierry, intel-gfx; +Cc: Mika Kuoppala
Quoting Michel Thierry (2018-03-20 00:39:35)
> On 3/19/2018 5:18 PM, Chris Wilson wrote:
> > Not all callers want the GPU error to handled in the same way, so expose
> > a control parameter. In the first instance, some callers do not want the
> > heavyweight error capture so add a bit to request the state to be
> > captured and saved.
> >
> > v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
> > reason for the reset in the dev_notice(), superseding the earlier option
> > to not print that notice.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Jeff McGee <jeff.mcgee@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > Cc: Michel Thierry <michel.thierry@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_debugfs.c | 4 +--
> > drivers/gpu/drm/i915/i915_drv.c | 17 +++++------
> > drivers/gpu/drm/i915/i915_drv.h | 10 +++---
> > drivers/gpu/drm/i915/i915_irq.c | 39 +++++++++++++-----------
> > drivers/gpu/drm/i915/intel_hangcheck.c | 13 ++++----
> > drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
> > 6 files changed, 48 insertions(+), 48 deletions(-)
> >
> ...
> > diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> > index 42e45ae87393..fd0ffb8328d0 100644
> > --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> > +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> > @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
> > */
> > tmp = I915_READ_CTL(engine);
> > if (tmp & RING_WAIT) {
> > - i915_handle_error(dev_priv, 0,
> > - "Kicking stuck wait on %s",
> > - engine->name);
> > + i915_handle_error(dev_priv, BIT(engine->id), 0,
> > + "stuck wait on %s", engine->name);
> Before we were not resetting anything here, is this change on purpose?
> (if it is, it's worth adding it to the commit msg since it's changing
> behavior).
>
> > I915_WRITE_CTL(engine, tmp);
> > return ENGINE_WAIT_KICK;
> > } > @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64
> acthd)
> > default:
> > return ENGINE_DEAD;
> > case 1:
> > - i915_handle_error(dev_priv, 0,
> > - "Kicking stuck semaphore on %s",
> > + i915_handle_error(dev_priv, ALL_ENGINES, 0,
> Same here,
Both are functionally no-op changes, as they are only for !per-engine
platforms (unless someone manages to send just the wrong type of garbage
to the GPU). I just thought it interesting to document that wait-event
needs a local kick and the wait-sema needs to kick the other engines.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
2018-03-20 0:44 ` Chris Wilson
@ 2018-03-20 0:56 ` Michel Thierry
2018-03-20 1:09 ` Chris Wilson
0 siblings, 1 reply; 19+ messages in thread
From: Michel Thierry @ 2018-03-20 0:56 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala
On 3/19/2018 5:44 PM, Chris Wilson wrote:
> Quoting Michel Thierry (2018-03-20 00:39:35)
>> On 3/19/2018 5:18 PM, Chris Wilson wrote:
>>> Not all callers want the GPU error to handled in the same way, so expose
>>> a control parameter. In the first instance, some callers do not want the
>>> heavyweight error capture so add a bit to request the state to be
>>> captured and saved.
>>>
>>> v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
>>> reason for the reset in the dev_notice(), superseding the earlier option
>>> to not print that notice.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Jeff McGee <jeff.mcgee@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> Cc: Michel Thierry <michel.thierry@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_debugfs.c | 4 +--
>>> drivers/gpu/drm/i915/i915_drv.c | 17 +++++------
>>> drivers/gpu/drm/i915/i915_drv.h | 10 +++---
>>> drivers/gpu/drm/i915/i915_irq.c | 39 +++++++++++++-----------
>>> drivers/gpu/drm/i915/intel_hangcheck.c | 13 ++++----
>>> drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
>>> 6 files changed, 48 insertions(+), 48 deletions(-)
>>>
>> ...
>>> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
>>> index 42e45ae87393..fd0ffb8328d0 100644
>>> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
>>> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
>>> @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
>>> */
>>> tmp = I915_READ_CTL(engine);
>>> if (tmp & RING_WAIT) {
>>> - i915_handle_error(dev_priv, 0,
>>> - "Kicking stuck wait on %s",
>>> - engine->name);
>>> + i915_handle_error(dev_priv, BIT(engine->id), 0,
>>> + "stuck wait on %s", engine->name);
>> Before we were not resetting anything here, is this change on purpose?
>> (if it is, it's worth adding it to the commit msg since it's changing
>> behavior).
>>
>>> I915_WRITE_CTL(engine, tmp);
>>> return ENGINE_WAIT_KICK;
>>> } > @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64
>> acthd)
>>> default:
>>> return ENGINE_DEAD;
>>> case 1:
>>> - i915_handle_error(dev_priv, 0,
>>> - "Kicking stuck semaphore on %s",
>>> + i915_handle_error(dev_priv, ALL_ENGINES, 0,
>> Same here,
>
> Both are functionally no-op changes, as they are only for !per-engine
> platforms (unless someone manages to send just the wrong type of garbage
> to the GPU). I just thought it interesting to document that wait-event
> needs a local kick and the wait-sema needs to kick the other engines.
i915_handle_error has this before full reset:
if (!engine_mask)
goto out;
No reset at all was happening before.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/5] drm/i915: Add control flags to i915_handle_error()
2018-03-20 0:56 ` Michel Thierry
@ 2018-03-20 1:09 ` Chris Wilson
0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-03-20 1:09 UTC (permalink / raw)
To: Michel Thierry, intel-gfx; +Cc: Mika Kuoppala
Quoting Michel Thierry (2018-03-20 00:56:04)
> On 3/19/2018 5:44 PM, Chris Wilson wrote:
> > Quoting Michel Thierry (2018-03-20 00:39:35)
> >> On 3/19/2018 5:18 PM, Chris Wilson wrote:
> >>> Not all callers want the GPU error to handled in the same way, so expose
> >>> a control parameter. In the first instance, some callers do not want the
> >>> heavyweight error capture so add a bit to request the state to be
> >>> captured and saved.
> >>>
> >>> v2: Pass msg down to i915_reset/i915_reset_engine so that we include the
> >>> reason for the reset in the dev_notice(), superseding the earlier option
> >>> to not print that notice.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Jeff McGee <jeff.mcgee@intel.com>
> >>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >>> Cc: Michel Thierry <michel.thierry@intel.com>
> >>> ---
> >>> drivers/gpu/drm/i915/i915_debugfs.c | 4 +--
> >>> drivers/gpu/drm/i915/i915_drv.c | 17 +++++------
> >>> drivers/gpu/drm/i915/i915_drv.h | 10 +++---
> >>> drivers/gpu/drm/i915/i915_irq.c | 39 +++++++++++++-----------
> >>> drivers/gpu/drm/i915/intel_hangcheck.c | 13 ++++----
> >>> drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++-----
> >>> 6 files changed, 48 insertions(+), 48 deletions(-)
> >>>
> >> ...
> >>> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> >>> index 42e45ae87393..fd0ffb8328d0 100644
> >>> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> >>> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> >>> @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
> >>> */
> >>> tmp = I915_READ_CTL(engine);
> >>> if (tmp & RING_WAIT) {
> >>> - i915_handle_error(dev_priv, 0,
> >>> - "Kicking stuck wait on %s",
> >>> - engine->name);
> >>> + i915_handle_error(dev_priv, BIT(engine->id), 0,
> >>> + "stuck wait on %s", engine->name);
> >> Before we were not resetting anything here, is this change on purpose?
> >> (if it is, it's worth adding it to the commit msg since it's changing
> >> behavior).
> >>
> >>> I915_WRITE_CTL(engine, tmp);
> >>> return ENGINE_WAIT_KICK;
> >>> } > @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64
> >> acthd)
> >>> default:
> >>> return ENGINE_DEAD;
> >>> case 1:
> >>> - i915_handle_error(dev_priv, 0,
> >>> - "Kicking stuck semaphore on %s",
> >>> + i915_handle_error(dev_priv, ALL_ENGINES, 0,
> >> Same here,
> >
> > Both are functionally no-op changes, as they are only for !per-engine
> > platforms (unless someone manages to send just the wrong type of garbage
> > to the GPU). I just thought it interesting to document that wait-event
> > needs a local kick and the wait-sema needs to kick the other engines.
> i915_handle_error has this before full reset:
>
> if (!engine_mask)
> goto out;
>
> No reset at all was happening before.
We bugged out a while back then ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
` (4 preceding siblings ...)
2018-03-20 0:39 ` [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Michel Thierry
@ 2018-03-20 1:24 ` Patchwork
2018-03-20 1:25 ` ✗ Fi.CI.SPARSE: " Patchwork
` (2 subsequent siblings)
8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-03-20 1:24 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
URL : https://patchwork.freedesktop.org/series/40240/
State : warning
== Summary ==
$ dim checkpatch origin/drm-tip
b279af513a73 drm/i915: Add control flags to i915_handle_error()
-:111: CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#111: FILE: drivers/gpu/drm/i915/i915_drv.h:2703:
+extern void i915_reset(struct drm_i915_private *i915, const char *msg);
-:112: CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#112: FILE: drivers/gpu/drm/i915/i915_drv.h:2704:
+extern int i915_reset_engine(struct intel_engine_cs *engine, const char *msg);
total: 0 errors, 0 warnings, 2 checks, 273 lines checked
76088d3cc0fb drm/i915/execlists: Refactor out complete_preempt_context()
fe83f1c114bf drm/i915: Move engine reset prepare/finish to backends
597e8d4ae7a5 drm/i915: Split execlists/guc reset prepartions
2950e96e849d drm/i915/execlists: Flush pending preemption events during reset
-:103: WARNING:LONG_LINE: line over 100 characters
#103: FILE: drivers/gpu/drm/i915/intel_lrc.c:879:
+ head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-:104: WARNING:LONG_LINE: line over 100 characters
#104: FILE: drivers/gpu/drm/i915/intel_lrc.c:880:
+ tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
-:159: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#159: FILE: drivers/gpu/drm/i915/intel_lrc.c:911:
+ status, buf[2*head + 1],
^
-:188: CHECK:SPACING: spaces preferred around that '*' (ctx:VxV)
#188: FILE: drivers/gpu/drm/i915/intel_lrc.c:929:
+ buf[2*head + 1] == execlists->preempt_complete_status) {
^
total: 0 errors, 2 warnings, 2 checks, 396 lines checked
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
` (5 preceding siblings ...)
2018-03-20 1:24 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] " Patchwork
@ 2018-03-20 1:25 ` Patchwork
2018-03-20 1:41 ` ✓ Fi.CI.BAT: success " Patchwork
2018-03-20 6:45 ` ✗ Fi.CI.IGT: failure " Patchwork
8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-03-20 1:25 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
URL : https://patchwork.freedesktop.org/series/40240/
State : warning
== Summary ==
$ dim sparse origin/drm-tip
Commit: drm/i915: Add control flags to i915_handle_error()
+drivers/gpu/drm/i915/i915_request.c:1232:35: warning: Using plain integer as NULL pointer
Commit: drm/i915/execlists: Refactor out complete_preempt_context()
Okay!
Commit: drm/i915: Move engine reset prepare/finish to backends
Okay!
Commit: drm/i915: Split execlists/guc reset prepartions
Okay!
Commit: drm/i915/execlists: Flush pending preemption events during reset
Okay!
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* ✓ Fi.CI.BAT: success for series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
` (6 preceding siblings ...)
2018-03-20 1:25 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-03-20 1:41 ` Patchwork
2018-03-20 6:45 ` ✗ Fi.CI.IGT: failure " Patchwork
8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-03-20 1:41 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
URL : https://patchwork.freedesktop.org/series/40240/
State : success
== Summary ==
Series 40240v1 series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
https://patchwork.freedesktop.org/api/1.0/series/40240/revisions/1/mbox/
---- Known issues:
Test kms_chamelium:
Subgroup dp-crc-fast:
pass -> DMESG-FAIL (fi-kbl-7500u) fdo#103841
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
incomplete -> PASS (fi-snb-2520m) fdo#103713
fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fi-bdw-5557u total:285 pass:264 dwarn:0 dfail:0 fail:0 skip:21 time:432s
fi-bdw-gvtdvm total:285 pass:261 dwarn:0 dfail:0 fail:0 skip:24 time:445s
fi-blb-e6850 total:285 pass:220 dwarn:1 dfail:0 fail:0 skip:64 time:378s
fi-bsw-n3050 total:285 pass:239 dwarn:0 dfail:0 fail:0 skip:46 time:533s
fi-bwr-2160 total:285 pass:180 dwarn:0 dfail:0 fail:0 skip:105 time:296s
fi-bxt-j4205 total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:516s
fi-byt-j1900 total:285 pass:250 dwarn:0 dfail:0 fail:0 skip:35 time:516s
fi-byt-n2820 total:285 pass:246 dwarn:0 dfail:0 fail:0 skip:39 time:504s
fi-cfl-8700k total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:414s
fi-cfl-s2 total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:585s
fi-cfl-u total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:512s
fi-cnl-drrs total:285 pass:254 dwarn:3 dfail:0 fail:0 skip:28 time:523s
fi-elk-e7500 total:285 pass:225 dwarn:1 dfail:0 fail:0 skip:59 time:426s
fi-gdg-551 total:285 pass:176 dwarn:0 dfail:0 fail:1 skip:108 time:317s
fi-glk-1 total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:536s
fi-hsw-4770 total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:400s
fi-ilk-650 total:285 pass:225 dwarn:0 dfail:0 fail:0 skip:60 time:422s
fi-ivb-3520m total:285 pass:256 dwarn:0 dfail:0 fail:0 skip:29 time:473s
fi-ivb-3770 total:285 pass:252 dwarn:0 dfail:0 fail:0 skip:33 time:428s
fi-kbl-7500u total:285 pass:259 dwarn:1 dfail:1 fail:0 skip:24 time:478s
fi-kbl-7567u total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:467s
fi-kbl-r total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:514s
fi-pnv-d510 total:285 pass:219 dwarn:1 dfail:0 fail:0 skip:65 time:655s
fi-skl-6260u total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:439s
fi-skl-6600u total:285 pass:258 dwarn:0 dfail:0 fail:0 skip:27 time:530s
fi-skl-6700hq total:285 pass:259 dwarn:0 dfail:0 fail:0 skip:26 time:542s
fi-skl-6700k2 total:285 pass:261 dwarn:0 dfail:0 fail:0 skip:24 time:504s
fi-skl-6770hq total:285 pass:265 dwarn:0 dfail:0 fail:0 skip:20 time:487s
fi-skl-guc total:285 pass:257 dwarn:0 dfail:0 fail:0 skip:28 time:427s
fi-skl-gvtdvm total:285 pass:262 dwarn:0 dfail:0 fail:0 skip:23 time:447s
fi-snb-2520m total:285 pass:245 dwarn:0 dfail:0 fail:0 skip:40 time:568s
fi-snb-2600 total:285 pass:245 dwarn:0 dfail:0 fail:0 skip:40 time:397s
141def2a45f4a3ad7c7e9144cd26e97bb1298397 drm-tip: 2018y-03m-19d-23h-48m-43s UTC integration manifest
2950e96e849d drm/i915/execlists: Flush pending preemption events during reset
597e8d4ae7a5 drm/i915: Split execlists/guc reset prepartions
fe83f1c114bf drm/i915: Move engine reset prepare/finish to backends
76088d3cc0fb drm/i915/execlists: Refactor out complete_preempt_context()
b279af513a73 drm/i915: Add control flags to i915_handle_error()
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8406/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* ✗ Fi.CI.IGT: failure for series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
` (7 preceding siblings ...)
2018-03-20 1:41 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-03-20 6:45 ` Patchwork
8 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2018-03-20 6:45 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/5] drm/i915: Add control flags to i915_handle_error()
URL : https://patchwork.freedesktop.org/series/40240/
State : failure
== Summary ==
---- Possible new issues:
Test drm_mm:
Subgroup sanitycheck:
pass -> INCOMPLETE (shard-apl)
Test drv_selftest:
Subgroup live_guc:
pass -> SKIP (shard-apl)
Subgroup live_hangcheck:
pass -> DMESG-FAIL (shard-apl)
---- Known issues:
Test kms_cursor_legacy:
Subgroup 2x-long-flip-vs-cursor-legacy:
incomplete -> PASS (shard-hsw) fdo#104873
Test kms_flip:
Subgroup dpms-vs-vblank-race:
fail -> PASS (shard-hsw) fdo#103060
Subgroup flip-vs-absolute-wf_vblank:
fail -> PASS (shard-hsw) fdo#100368
Subgroup flip-vs-expired-vblank-interruptible:
pass -> FAIL (shard-hsw) fdo#102887
Test kms_flip_tiling:
Subgroup flip-x-tiled:
fail -> PASS (shard-apl) fdo#103822
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-a:
skip -> PASS (shard-snb) fdo#103375 +1
Test kms_plane:
Subgroup plane-position-hole-pipe-c-planes:
fail -> PASS (shard-apl) fdo#103166
fdo#104873 https://bugs.freedesktop.org/show_bug.cgi?id=104873
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
shard-apl total:3462 pass:1795 dwarn:1 dfail:1 fail:7 skip:1656 time:12458s
shard-hsw total:3478 pass:1767 dwarn:1 dfail:0 fail:2 skip:1707 time:11739s
shard-snb total:3478 pass:1357 dwarn:1 dfail:0 fail:2 skip:2118 time:7153s
Blacklisted hosts:
shard-kbl total:3478 pass:1937 dwarn:1 dfail:1 fail:9 skip:1530 time:9861s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8406/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset
2018-03-20 0:18 ` [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset Chris Wilson
@ 2018-03-21 0:17 ` Jeff McGee
2018-03-22 15:14 ` Jeff McGee
0 siblings, 1 reply; 19+ messages in thread
From: Jeff McGee @ 2018-03-21 0:17 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote:
> Catch up with the inflight CSB events, after disabling the tasklet
> before deciding which request was truly guilty of hanging the GPU.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
> drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
> 1 file changed, 197 insertions(+), 158 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e5a3ffbc273a..beb81f13a3cc 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> local_irq_restore(flags);
> }
>
> -/*
> - * Check the unread Context Status Buffers and manage the submission of new
> - * contexts to the ELSP accordingly.
> - */
> -static void execlists_submission_tasklet(unsigned long data)
> +static void process_csb(struct intel_engine_cs *engine)
> {
> - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> struct intel_engine_execlists * const execlists = &engine->execlists;
> struct execlist_port * const port = execlists->port;
> - struct drm_i915_private *dev_priv = engine->i915;
> + struct drm_i915_private *i915 = engine->i915;
> + unsigned int head, tail;
> + const u32 *buf;
> bool fw = false;
>
> - /* We can skip acquiring intel_runtime_pm_get() here as it was taken
> - * on our behalf by the request (see i915_gem_mark_busy()) and it will
> - * not be relinquished until the device is idle (see
> - * i915_gem_idle_work_handler()). As a precaution, we make sure
> - * that all ELSP are drained i.e. we have processed the CSB,
> - * before allowing ourselves to idle and calling intel_runtime_pm_put().
> - */
> - GEM_BUG_ON(!dev_priv->gt.awake);
> + if (unlikely(execlists->csb_use_mmio)) {
> + buf = (u32 * __force)
> + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> + execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> + } else {
> + /* The HWSP contains a (cacheable) mirror of the CSB */
> + buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> + }
>
> - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
> - * imposing the cost of a locked atomic transaction when submitting a
> - * new request (outside of the context-switch interrupt).
> + /*
> + * The write will be ordered by the uncached read (itself
> + * a memory barrier), so we do not need another in the form
> + * of a locked instruction. The race between the interrupt
> + * handler and the split test/clear is harmless as we order
> + * our clear before the CSB read. If the interrupt arrived
> + * first between the test and the clear, we read the updated
> + * CSB and clear the bit. If the interrupt arrives as we read
> + * the CSB or later (i.e. after we had cleared the bit) the bit
> + * is set and we do a new loop.
> */
> - while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
> - /* The HWSP contains a (cacheable) mirror of the CSB */
> - const u32 *buf =
> - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> - unsigned int head, tail;
> -
> - if (unlikely(execlists->csb_use_mmio)) {
> - buf = (u32 * __force)
> - (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> - execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> - }
> + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> + if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> + intel_uncore_forcewake_get(i915, execlists->fw_domains);
> + fw = true;
> +
> + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> + tail = GEN8_CSB_WRITE_PTR(head);
> + head = GEN8_CSB_READ_PTR(head);
> + execlists->csb_head = head;
> + } else {
> + const int write_idx =
> + intel_hws_csb_write_index(i915) -
> + I915_HWS_CSB_BUF0_INDEX;
> +
> + head = execlists->csb_head;
> + tail = READ_ONCE(buf[write_idx]);
> + }
> + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> + engine->name,
> + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> +
> + while (head != tail) {
> + struct i915_request *rq;
> + unsigned int status;
> + unsigned int count;
> +
> + if (++head == GEN8_CSB_ENTRIES)
> + head = 0;
>
> - /* The write will be ordered by the uncached read (itself
> - * a memory barrier), so we do not need another in the form
> - * of a locked instruction. The race between the interrupt
> - * handler and the split test/clear is harmless as we order
> - * our clear before the CSB read. If the interrupt arrived
> - * first between the test and the clear, we read the updated
> - * CSB and clear the bit. If the interrupt arrives as we read
> - * the CSB or later (i.e. after we had cleared the bit) the bit
> - * is set and we do a new loop.
> + /*
> + * We are flying near dragons again.
> + *
> + * We hold a reference to the request in execlist_port[]
> + * but no more than that. We are operating in softirq
> + * context and so cannot hold any mutex or sleep. That
> + * prevents us stopping the requests we are processing
> + * in port[] from being retired simultaneously (the
> + * breadcrumb will be complete before we see the
> + * context-switch). As we only hold the reference to the
> + * request, any pointer chasing underneath the request
> + * is subject to a potential use-after-free. Thus we
> + * store all of the bookkeeping within port[] as
> + * required, and avoid using unguarded pointers beneath
> + * request itself. The same applies to the atomic
> + * status notifier.
> */
> - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> - if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> - if (!fw) {
> - intel_uncore_forcewake_get(dev_priv,
> - execlists->fw_domains);
> - fw = true;
> - }
>
> - head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> - tail = GEN8_CSB_WRITE_PTR(head);
> - head = GEN8_CSB_READ_PTR(head);
> - execlists->csb_head = head;
> - } else {
> - const int write_idx =
> - intel_hws_csb_write_index(dev_priv) -
> - I915_HWS_CSB_BUF0_INDEX;
> + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> + engine->name, head,
> + status, buf[2*head + 1],
> + execlists->active);
> +
> + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> + GEN8_CTX_STATUS_PREEMPTED))
> + execlists_set_active(execlists,
> + EXECLISTS_ACTIVE_HWACK);
> + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> + execlists_clear_active(execlists,
> + EXECLISTS_ACTIVE_HWACK);
> +
> + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> + continue;
>
> - head = execlists->csb_head;
> - tail = READ_ONCE(buf[write_idx]);
> - }
> - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> - engine->name,
> - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> + /* We should never get a COMPLETED | IDLE_ACTIVE! */
> + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>
> - while (head != tail) {
> - struct i915_request *rq;
> - unsigned int status;
> - unsigned int count;
> + if (status & GEN8_CTX_STATUS_COMPLETE &&
> + buf[2*head + 1] == execlists->preempt_complete_status) {
> + GEM_TRACE("%s preempt-idle\n", engine->name);
> + complete_preempt_context(execlists);
> + continue;
> + }
>
> - if (++head == GEN8_CSB_ENTRIES)
> - head = 0;
> + if (status & GEN8_CTX_STATUS_PREEMPTED &&
> + execlists_is_active(execlists,
> + EXECLISTS_ACTIVE_PREEMPT))
> + continue;
>
> - /* We are flying near dragons again.
> - *
> - * We hold a reference to the request in execlist_port[]
> - * but no more than that. We are operating in softirq
> - * context and so cannot hold any mutex or sleep. That
> - * prevents us stopping the requests we are processing
> - * in port[] from being retired simultaneously (the
> - * breadcrumb will be complete before we see the
> - * context-switch). As we only hold the reference to the
> - * request, any pointer chasing underneath the request
> - * is subject to a potential use-after-free. Thus we
> - * store all of the bookkeeping within port[] as
> - * required, and avoid using unguarded pointers beneath
> - * request itself. The same applies to the atomic
> - * status notifier.
> - */
> + GEM_BUG_ON(!execlists_is_active(execlists,
> + EXECLISTS_ACTIVE_USER));
>
> - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> - engine->name, head,
> - status, buf[2*head + 1],
> - execlists->active);
> -
> - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> - GEN8_CTX_STATUS_PREEMPTED))
> - execlists_set_active(execlists,
> - EXECLISTS_ACTIVE_HWACK);
> - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> - execlists_clear_active(execlists,
> - EXECLISTS_ACTIVE_HWACK);
> -
> - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> - continue;
> + rq = port_unpack(port, &count);
> + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> + engine->name,
> + port->context_id, count,
> + rq ? rq->global_seqno : 0,
> + rq ? rq_prio(rq) : 0);
> +
> + /* Check the context/desc id for this event matches */
> + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> +
> + GEM_BUG_ON(count == 0);
> + if (--count == 0) {
> + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> + GEM_BUG_ON(port_isset(&port[1]) &&
> + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> + GEM_BUG_ON(!i915_request_completed(rq));
> + execlists_context_schedule_out(rq);
> + trace_i915_request_out(rq);
> + i915_request_put(rq);
> +
> + GEM_TRACE("%s completed ctx=%d\n",
> + engine->name, port->context_id);
> +
> + execlists_port_complete(execlists, port);
> + } else {
> + port_set(port, port_pack(rq, count));
> + }
>
> - /* We should never get a COMPLETED | IDLE_ACTIVE! */
> - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> + /* After the final element, the hw should be idle */
> + GEM_BUG_ON(port_count(port) == 0 &&
> + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> + if (port_count(port) == 0)
> + execlists_clear_active(execlists,
> + EXECLISTS_ACTIVE_USER);
> + }
>
> - if (status & GEN8_CTX_STATUS_COMPLETE &&
> - buf[2*head + 1] == execlists->preempt_complete_status) {
> - GEM_TRACE("%s preempt-idle\n", engine->name);
> - complete_preempt_context(execlists);
> - continue;
> - }
> + if (head != execlists->csb_head) {
> + execlists->csb_head = head;
> + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> + }
>
> - if (status & GEN8_CTX_STATUS_PREEMPTED &&
> - execlists_is_active(execlists,
> - EXECLISTS_ACTIVE_PREEMPT))
> - continue;
> + if (unlikely(fw))
> + intel_uncore_forcewake_put(i915, execlists->fw_domains);
> +}
>
> - GEM_BUG_ON(!execlists_is_active(execlists,
> - EXECLISTS_ACTIVE_USER));
> -
> - rq = port_unpack(port, &count);
> - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> - engine->name,
> - port->context_id, count,
> - rq ? rq->global_seqno : 0,
> - rq ? rq_prio(rq) : 0);
> -
> - /* Check the context/desc id for this event matches */
> - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> -
> - GEM_BUG_ON(count == 0);
> - if (--count == 0) {
> - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> - GEM_BUG_ON(port_isset(&port[1]) &&
> - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> - GEM_BUG_ON(!i915_request_completed(rq));
> - execlists_context_schedule_out(rq);
> - trace_i915_request_out(rq);
> - i915_request_put(rq);
> -
> - GEM_TRACE("%s completed ctx=%d\n",
> - engine->name, port->context_id);
> -
> - execlists_port_complete(execlists, port);
> - } else {
> - port_set(port, port_pack(rq, count));
> - }
> +/*
> + * Check the unread Context Status Buffers and manage the submission of new
> + * contexts to the ELSP accordingly.
> + */
> +static void execlists_submission_tasklet(unsigned long data)
> +{
> + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>
> - /* After the final element, the hw should be idle */
> - GEM_BUG_ON(port_count(port) == 0 &&
> - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> - if (port_count(port) == 0)
> - execlists_clear_active(execlists,
> - EXECLISTS_ACTIVE_USER);
> - }
> + /*
> + * We can skip acquiring intel_runtime_pm_get() here as it was taken
> + * on our behalf by the request (see i915_gem_mark_busy()) and it will
> + * not be relinquished until the device is idle (see
> + * i915_gem_idle_work_handler()). As a precaution, we make sure
> + * that all ELSP are drained i.e. we have processed the CSB,
> + * before allowing ourselves to idle and calling intel_runtime_pm_put().
> + */
> + GEM_BUG_ON(!engine->i915->gt.awake);
>
> - if (head != execlists->csb_head) {
> - execlists->csb_head = head;
> - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> - }
> - }
> + /*
> + * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> + * imposing the cost of a locked atomic transaction when submitting a
> + * new request (outside of the context-switch interrupt).
> + */
> + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
This was a 'while' before refactoring. Shouldn't it still be in order to catch
new irq_posted during processing?
BTW, I have revised and rebased the force preemption patches on drm-tip with
this and the other related patches you have proposed. I just started running
my IGT test that covers the innocent context on port[1] scenario that we
discussed on IRC. Getting a sporadic failure but need more time to root cause.
Will update soon.
> + process_csb(engine);
>
> - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> + if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> execlists_dequeue(engine);
> -
> - if (fw)
> - intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> }
>
> static void queue_request(struct intel_engine_cs *engine,
> @@ -1667,6 +1673,7 @@ static struct i915_request *
> execlists_reset_prepare(struct intel_engine_cs *engine)
> {
> struct intel_engine_execlists * const execlists = &engine->execlists;
> + struct i915_request *request, *active;
>
> /*
> * Prevent request submission to the hardware until we have
> @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> tasklet_kill(&execlists->tasklet);
> tasklet_disable(&execlists->tasklet);
>
> - return i915_gem_find_active_request(engine);
> + /*
> + * We want to flush the pending context switches, having disabled
> + * the tasklet above, we can assume exclusive access to the execlists.
> + * For this allows us to catch up with an inflight preemption event,
> + * and avoid blaming an innocent request if the stall was due to the
> + * preemption itself.
> + */
> + process_csb(engine);
> +
> + /*
> + * The last active request can then be no later than the last request
> + * now in ELSP[0]. So search backwards from there, so that is the GPU
> + * has advanced beyond the last CSB update, it will be pardoned.
> + */
> + active = NULL;
> + request = port_request(execlists->port);
> + if (request) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&engine->timeline->lock, flags);
> + list_for_each_entry_from_reverse(request,
> + &engine->timeline->requests,
> + link) {
> + if (__i915_request_completed(request,
> + request->global_seqno))
> + break;
> +
> + active = request;
> + }
> + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> + }
> +
> + return active;
> }
>
> static void reset_irq(struct intel_engine_cs *engine)
> --
> 2.16.2
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset
2018-03-21 0:17 ` Jeff McGee
@ 2018-03-22 15:14 ` Jeff McGee
2018-03-26 11:28 ` Chris Wilson
0 siblings, 1 reply; 19+ messages in thread
From: Jeff McGee @ 2018-03-22 15:14 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, Mar 20, 2018 at 05:17:34PM -0700, Jeff McGee wrote:
> On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote:
> > Catch up with the inflight CSB events, after disabling the tasklet
> > before deciding which request was truly guilty of hanging the GPU.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > CC: Michel Thierry <michel.thierry@intel.com>
> > Cc: Jeff McGee <jeff.mcgee@intel.com>
> > ---
> > drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
> > 1 file changed, 197 insertions(+), 158 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index e5a3ffbc273a..beb81f13a3cc 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> > local_irq_restore(flags);
> > }
> >
> > -/*
> > - * Check the unread Context Status Buffers and manage the submission of new
> > - * contexts to the ELSP accordingly.
> > - */
> > -static void execlists_submission_tasklet(unsigned long data)
> > +static void process_csb(struct intel_engine_cs *engine)
> > {
> > - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> > struct intel_engine_execlists * const execlists = &engine->execlists;
> > struct execlist_port * const port = execlists->port;
> > - struct drm_i915_private *dev_priv = engine->i915;
> > + struct drm_i915_private *i915 = engine->i915;
> > + unsigned int head, tail;
> > + const u32 *buf;
> > bool fw = false;
> >
> > - /* We can skip acquiring intel_runtime_pm_get() here as it was taken
> > - * on our behalf by the request (see i915_gem_mark_busy()) and it will
> > - * not be relinquished until the device is idle (see
> > - * i915_gem_idle_work_handler()). As a precaution, we make sure
> > - * that all ELSP are drained i.e. we have processed the CSB,
> > - * before allowing ourselves to idle and calling intel_runtime_pm_put().
> > - */
> > - GEM_BUG_ON(!dev_priv->gt.awake);
> > + if (unlikely(execlists->csb_use_mmio)) {
> > + buf = (u32 * __force)
> > + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > + execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> > + } else {
> > + /* The HWSP contains a (cacheable) mirror of the CSB */
> > + buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > + }
> >
> > - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
> > - * imposing the cost of a locked atomic transaction when submitting a
> > - * new request (outside of the context-switch interrupt).
> > + /*
> > + * The write will be ordered by the uncached read (itself
> > + * a memory barrier), so we do not need another in the form
> > + * of a locked instruction. The race between the interrupt
> > + * handler and the split test/clear is harmless as we order
> > + * our clear before the CSB read. If the interrupt arrived
> > + * first between the test and the clear, we read the updated
> > + * CSB and clear the bit. If the interrupt arrives as we read
> > + * the CSB or later (i.e. after we had cleared the bit) the bit
> > + * is set and we do a new loop.
> > */
> > - while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
> > - /* The HWSP contains a (cacheable) mirror of the CSB */
> > - const u32 *buf =
> > - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > - unsigned int head, tail;
> > -
> > - if (unlikely(execlists->csb_use_mmio)) {
> > - buf = (u32 * __force)
> > - (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > - execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> > - }
> > + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > + if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> > + intel_uncore_forcewake_get(i915, execlists->fw_domains);
> > + fw = true;
> > +
> > + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > + tail = GEN8_CSB_WRITE_PTR(head);
> > + head = GEN8_CSB_READ_PTR(head);
> > + execlists->csb_head = head;
> > + } else {
> > + const int write_idx =
> > + intel_hws_csb_write_index(i915) -
> > + I915_HWS_CSB_BUF0_INDEX;
> > +
> > + head = execlists->csb_head;
> > + tail = READ_ONCE(buf[write_idx]);
> > + }
> > + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > + engine->name,
> > + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > +
> > + while (head != tail) {
> > + struct i915_request *rq;
> > + unsigned int status;
> > + unsigned int count;
> > +
> > + if (++head == GEN8_CSB_ENTRIES)
> > + head = 0;
> >
> > - /* The write will be ordered by the uncached read (itself
> > - * a memory barrier), so we do not need another in the form
> > - * of a locked instruction. The race between the interrupt
> > - * handler and the split test/clear is harmless as we order
> > - * our clear before the CSB read. If the interrupt arrived
> > - * first between the test and the clear, we read the updated
> > - * CSB and clear the bit. If the interrupt arrives as we read
> > - * the CSB or later (i.e. after we had cleared the bit) the bit
> > - * is set and we do a new loop.
> > + /*
> > + * We are flying near dragons again.
> > + *
> > + * We hold a reference to the request in execlist_port[]
> > + * but no more than that. We are operating in softirq
> > + * context and so cannot hold any mutex or sleep. That
> > + * prevents us stopping the requests we are processing
> > + * in port[] from being retired simultaneously (the
> > + * breadcrumb will be complete before we see the
> > + * context-switch). As we only hold the reference to the
> > + * request, any pointer chasing underneath the request
> > + * is subject to a potential use-after-free. Thus we
> > + * store all of the bookkeeping within port[] as
> > + * required, and avoid using unguarded pointers beneath
> > + * request itself. The same applies to the atomic
> > + * status notifier.
> > */
> > - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > - if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> > - if (!fw) {
> > - intel_uncore_forcewake_get(dev_priv,
> > - execlists->fw_domains);
> > - fw = true;
> > - }
> >
> > - head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > - tail = GEN8_CSB_WRITE_PTR(head);
> > - head = GEN8_CSB_READ_PTR(head);
> > - execlists->csb_head = head;
> > - } else {
> > - const int write_idx =
> > - intel_hws_csb_write_index(dev_priv) -
> > - I915_HWS_CSB_BUF0_INDEX;
> > + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > + engine->name, head,
> > + status, buf[2*head + 1],
> > + execlists->active);
> > +
> > + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > + GEN8_CTX_STATUS_PREEMPTED))
> > + execlists_set_active(execlists,
> > + EXECLISTS_ACTIVE_HWACK);
> > + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > + execlists_clear_active(execlists,
> > + EXECLISTS_ACTIVE_HWACK);
> > +
> > + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > + continue;
> >
> > - head = execlists->csb_head;
> > - tail = READ_ONCE(buf[write_idx]);
> > - }
> > - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > - engine->name,
> > - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > + /* We should never get a COMPLETED | IDLE_ACTIVE! */
> > + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> >
> > - while (head != tail) {
> > - struct i915_request *rq;
> > - unsigned int status;
> > - unsigned int count;
> > + if (status & GEN8_CTX_STATUS_COMPLETE &&
> > + buf[2*head + 1] == execlists->preempt_complete_status) {
> > + GEM_TRACE("%s preempt-idle\n", engine->name);
> > + complete_preempt_context(execlists);
> > + continue;
> > + }
> >
> > - if (++head == GEN8_CSB_ENTRIES)
> > - head = 0;
> > + if (status & GEN8_CTX_STATUS_PREEMPTED &&
> > + execlists_is_active(execlists,
> > + EXECLISTS_ACTIVE_PREEMPT))
> > + continue;
> >
> > - /* We are flying near dragons again.
> > - *
> > - * We hold a reference to the request in execlist_port[]
> > - * but no more than that. We are operating in softirq
> > - * context and so cannot hold any mutex or sleep. That
> > - * prevents us stopping the requests we are processing
> > - * in port[] from being retired simultaneously (the
> > - * breadcrumb will be complete before we see the
> > - * context-switch). As we only hold the reference to the
> > - * request, any pointer chasing underneath the request
> > - * is subject to a potential use-after-free. Thus we
> > - * store all of the bookkeeping within port[] as
> > - * required, and avoid using unguarded pointers beneath
> > - * request itself. The same applies to the atomic
> > - * status notifier.
> > - */
> > + GEM_BUG_ON(!execlists_is_active(execlists,
> > + EXECLISTS_ACTIVE_USER));
> >
> > - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > - engine->name, head,
> > - status, buf[2*head + 1],
> > - execlists->active);
> > -
> > - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > - GEN8_CTX_STATUS_PREEMPTED))
> > - execlists_set_active(execlists,
> > - EXECLISTS_ACTIVE_HWACK);
> > - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > - execlists_clear_active(execlists,
> > - EXECLISTS_ACTIVE_HWACK);
> > -
> > - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > - continue;
> > + rq = port_unpack(port, &count);
> > + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> > + engine->name,
> > + port->context_id, count,
> > + rq ? rq->global_seqno : 0,
> > + rq ? rq_prio(rq) : 0);
> > +
> > + /* Check the context/desc id for this event matches */
> > + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> > +
> > + GEM_BUG_ON(count == 0);
> > + if (--count == 0) {
> > + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> > + GEM_BUG_ON(port_isset(&port[1]) &&
> > + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> > + GEM_BUG_ON(!i915_request_completed(rq));
> > + execlists_context_schedule_out(rq);
> > + trace_i915_request_out(rq);
> > + i915_request_put(rq);
> > +
> > + GEM_TRACE("%s completed ctx=%d\n",
> > + engine->name, port->context_id);
> > +
> > + execlists_port_complete(execlists, port);
> > + } else {
> > + port_set(port, port_pack(rq, count));
> > + }
> >
> > - /* We should never get a COMPLETED | IDLE_ACTIVE! */
> > - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> > + /* After the final element, the hw should be idle */
> > + GEM_BUG_ON(port_count(port) == 0 &&
> > + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > + if (port_count(port) == 0)
> > + execlists_clear_active(execlists,
> > + EXECLISTS_ACTIVE_USER);
> > + }
> >
> > - if (status & GEN8_CTX_STATUS_COMPLETE &&
> > - buf[2*head + 1] == execlists->preempt_complete_status) {
> > - GEM_TRACE("%s preempt-idle\n", engine->name);
> > - complete_preempt_context(execlists);
> > - continue;
> > - }
> > + if (head != execlists->csb_head) {
> > + execlists->csb_head = head;
> > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > + }
> >
> > - if (status & GEN8_CTX_STATUS_PREEMPTED &&
> > - execlists_is_active(execlists,
> > - EXECLISTS_ACTIVE_PREEMPT))
> > - continue;
> > + if (unlikely(fw))
> > + intel_uncore_forcewake_put(i915, execlists->fw_domains);
> > +}
> >
> > - GEM_BUG_ON(!execlists_is_active(execlists,
> > - EXECLISTS_ACTIVE_USER));
> > -
> > - rq = port_unpack(port, &count);
> > - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> > - engine->name,
> > - port->context_id, count,
> > - rq ? rq->global_seqno : 0,
> > - rq ? rq_prio(rq) : 0);
> > -
> > - /* Check the context/desc id for this event matches */
> > - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> > -
> > - GEM_BUG_ON(count == 0);
> > - if (--count == 0) {
> > - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> > - GEM_BUG_ON(port_isset(&port[1]) &&
> > - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> > - GEM_BUG_ON(!i915_request_completed(rq));
> > - execlists_context_schedule_out(rq);
> > - trace_i915_request_out(rq);
> > - i915_request_put(rq);
> > -
> > - GEM_TRACE("%s completed ctx=%d\n",
> > - engine->name, port->context_id);
> > -
> > - execlists_port_complete(execlists, port);
> > - } else {
> > - port_set(port, port_pack(rq, count));
> > - }
> > +/*
> > + * Check the unread Context Status Buffers and manage the submission of new
> > + * contexts to the ELSP accordingly.
> > + */
> > +static void execlists_submission_tasklet(unsigned long data)
> > +{
> > + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> >
> > - /* After the final element, the hw should be idle */
> > - GEM_BUG_ON(port_count(port) == 0 &&
> > - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > - if (port_count(port) == 0)
> > - execlists_clear_active(execlists,
> > - EXECLISTS_ACTIVE_USER);
> > - }
> > + /*
> > + * We can skip acquiring intel_runtime_pm_get() here as it was taken
> > + * on our behalf by the request (see i915_gem_mark_busy()) and it will
> > + * not be relinquished until the device is idle (see
> > + * i915_gem_idle_work_handler()). As a precaution, we make sure
> > + * that all ELSP are drained i.e. we have processed the CSB,
> > + * before allowing ourselves to idle and calling intel_runtime_pm_put().
> > + */
> > + GEM_BUG_ON(!engine->i915->gt.awake);
> >
> > - if (head != execlists->csb_head) {
> > - execlists->csb_head = head;
> > - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > - }
> > - }
> > + /*
> > + * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> > + * imposing the cost of a locked atomic transaction when submitting a
> > + * new request (outside of the context-switch interrupt).
> > + */
> > + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> This was a 'while' before refactoring. Shouldn't it still be in order to catch
> new irq_posted during processing?
>
> BTW, I have revised and rebased the force preemption patches on drm-tip with
> this and the other related patches you have proposed. I just started running
> my IGT test that covers the innocent context on port[1] scenario that we
> discussed on IRC. Getting a sporadic failure but need more time to root cause.
> Will update soon.
>
> > + process_csb(engine);
> >
> > - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> > + if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> > execlists_dequeue(engine);
> > -
> > - if (fw)
> > - intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> > }
> >
> > static void queue_request(struct intel_engine_cs *engine,
> > @@ -1667,6 +1673,7 @@ static struct i915_request *
> > execlists_reset_prepare(struct intel_engine_cs *engine)
> > {
> > struct intel_engine_execlists * const execlists = &engine->execlists;
> > + struct i915_request *request, *active;
> >
> > /*
> > * Prevent request submission to the hardware until we have
> > @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> > tasklet_kill(&execlists->tasklet);
> > tasklet_disable(&execlists->tasklet);
> >
> > - return i915_gem_find_active_request(engine);
> > + /*
> > + * We want to flush the pending context switches, having disabled
> > + * the tasklet above, we can assume exclusive access to the execlists.
> > + * For this allows us to catch up with an inflight preemption event,
> > + * and avoid blaming an innocent request if the stall was due to the
> > + * preemption itself.
> > + */
> > + process_csb(engine);
> > +
> > + /*
> > + * The last active request can then be no later than the last request
> > + * now in ELSP[0]. So search backwards from there, so that is the GPU
> > + * has advanced beyond the last CSB update, it will be pardoned.
> > + */
> > + active = NULL;
> > + request = port_request(execlists->port);
> > + if (request) {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&engine->timeline->lock, flags);
> > + list_for_each_entry_from_reverse(request,
> > + &engine->timeline->requests,
> > + link) {
> > + if (__i915_request_completed(request,
> > + request->global_seqno))
> > + break;
> > +
> > + active = request;
> > + }
> > + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> > + }
> > +
> > + return active;
Now we can see an abort of the reset if process_csb() processes a completed
preemption. So we need to kick the tasklet to get a dequeue of the waiting
request to happen. Currently we only kick if needed in gen8_init_common_ring
which is skipped if we abort the reset.
Otherwise this is working well in my force preemption tests.
-Jeff
> > }
> >
> > static void reset_irq(struct intel_engine_cs *engine)
> > --
> > 2.16.2
> >
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context()
2018-03-20 0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
@ 2018-03-22 15:26 ` Jeff McGee
0 siblings, 0 replies; 19+ messages in thread
From: Jeff McGee @ 2018-03-22 15:26 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, Mar 20, 2018 at 12:18:45AM +0000, Chris Wilson wrote:
> As a complement to inject_preempt_context(), follow up with the function
> to handle its completion. This will be useful should we wish to extend
> the duties of the preempt-context for execlists.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> ---
> drivers/gpu/drm/i915/intel_lrc.c | 22 ++++++++++++----------
> 1 file changed, 12 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 53f1c009ed7b..0bfaeb56b8c7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -531,8 +531,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> if (execlists->ctrl_reg)
> writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>
> - execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
> - execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> + execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
> + execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> +}
> +
> +static void complete_preempt_context(struct intel_engine_execlists *execlists)
> +{
> + execlists_cancel_port_requests(execlists);
> + execlists_unwind_incomplete_requests(execlists);
> +
> + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
> + execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> }
>
> static void execlists_dequeue(struct intel_engine_cs *engine)
> @@ -939,14 +948,7 @@ static void execlists_submission_tasklet(unsigned long data)
> if (status & GEN8_CTX_STATUS_COMPLETE &&
> buf[2*head + 1] == execlists->preempt_complete_status) {
> GEM_TRACE("%s preempt-idle\n", engine->name);
> -
> - execlists_cancel_port_requests(execlists);
> - execlists_unwind_incomplete_requests(execlists);
> -
> - GEM_BUG_ON(!execlists_is_active(execlists,
> - EXECLISTS_ACTIVE_PREEMPT));
> - execlists_clear_active(execlists,
> - EXECLISTS_ACTIVE_PREEMPT);
> + complete_preempt_context(execlists);
> continue;
> }
>
> --
> 2.16.2
>
Reviewed-by: Jeff McGee <jeff.mcgee@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends
2018-03-20 0:18 ` [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends Chris Wilson
@ 2018-03-22 15:28 ` Jeff McGee
0 siblings, 0 replies; 19+ messages in thread
From: Jeff McGee @ 2018-03-22 15:28 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, Mar 20, 2018 at 12:18:46AM +0000, Chris Wilson wrote:
> In preparation to more carefully handling incomplete preemption during
> reset by execlists, we move the existing code wholesale to the backends
> under a couple of new reset vfuncs.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 42 ++++----------------------
> drivers/gpu/drm/i915/intel_lrc.c | 52 +++++++++++++++++++++++++++++++--
> drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++++++++++--
> drivers/gpu/drm/i915/intel_ringbuffer.h | 9 ++++--
> 4 files changed, 78 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 802df8e1a544..38f7160d99c9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2917,7 +2917,7 @@ static bool engine_stalled(struct intel_engine_cs *engine)
> struct i915_request *
> i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
> {
> - struct i915_request *request = NULL;
> + struct i915_request *request;
>
> /*
> * During the reset sequence, we must prevent the engine from
> @@ -2940,40 +2940,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
> */
> kthread_park(engine->breadcrumbs.signaler);
>
> - /*
> - * Prevent request submission to the hardware until we have
> - * completed the reset in i915_gem_reset_finish(). If a request
> - * is completed by one engine, it may then queue a request
> - * to a second via its execlists->tasklet *just* as we are
> - * calling engine->init_hw() and also writing the ELSP.
> - * Turning off the execlists->tasklet until the reset is over
> - * prevents the race.
> - *
> - * Note that this needs to be a single atomic operation on the
> - * tasklet (flush existing tasks, prevent new tasks) to prevent
> - * a race between reset and set-wedged. It is not, so we do the best
> - * we can atm and make sure we don't lock the machine up in the more
> - * common case of recursively being called from set-wedged from inside
> - * i915_reset.
> - */
> - if (!atomic_read(&engine->execlists.tasklet.count))
> - tasklet_kill(&engine->execlists.tasklet);
> - tasklet_disable(&engine->execlists.tasklet);
> -
> - /*
> - * We're using worker to queue preemption requests from the tasklet in
> - * GuC submission mode.
> - * Even though tasklet was disabled, we may still have a worker queued.
> - * Let's make sure that all workers scheduled before disabling the
> - * tasklet are completed before continuing with the reset.
> - */
> - if (engine->i915->guc.preempt_wq)
> - flush_workqueue(engine->i915->guc.preempt_wq);
> -
> - if (engine->irq_seqno_barrier)
> - engine->irq_seqno_barrier(engine);
> -
> - request = i915_gem_find_active_request(engine);
> + request = engine->reset.prepare(engine);
> if (request && request->fence.error == -EIO)
> request = ERR_PTR(-EIO); /* Previous reset failed! */
>
> @@ -3120,7 +3087,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
> }
>
> /* Setup the CS to resume from the breadcrumb of the hung request */
> - engine->reset_hw(engine, request);
> + engine->reset.reset(engine, request);
> }
>
> void i915_gem_reset(struct drm_i915_private *dev_priv)
> @@ -3172,7 +3139,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
>
> void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
> {
> - tasklet_enable(&engine->execlists.tasklet);
> + engine->reset.finish(engine);
> +
> kthread_unpark(engine->breadcrumbs.signaler);
>
> intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 0bfaeb56b8c7..f662a9524233 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1663,6 +1663,44 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
> return init_workarounds_ring(engine);
> }
>
> +static struct i915_request *
> +execlists_reset_prepare(struct intel_engine_cs *engine)
> +{
> + struct intel_engine_execlists * const execlists = &engine->execlists;
> +
> + /*
> + * Prevent request submission to the hardware until we have
> + * completed the reset in i915_gem_reset_finish(). If a request
> + * is completed by one engine, it may then queue a request
> + * to a second via its execlists->tasklet *just* as we are
> + * calling engine->init_hw() and also writing the ELSP.
> + * Turning off the execlists->tasklet until the reset is over
> + * prevents the race.
> + *
> + * Note that this needs to be a single atomic operation on the
> + * tasklet (flush existing tasks, prevent new tasks) to prevent
> + * a race between reset and set-wedged. It is not, so we do the best
> + * we can atm and make sure we don't lock the machine up in the more
> + * common case of recursively being called from set-wedged from inside
> + * i915_reset.
> + */
> + if (!atomic_read(&execlists->tasklet.count))
> + tasklet_kill(&execlists->tasklet);
> + tasklet_disable(&execlists->tasklet);
> +
> + /*
> + * We're using worker to queue preemption requests from the tasklet in
> + * GuC submission mode.
> + * Even though tasklet was disabled, we may still have a worker queued.
> + * Let's make sure that all workers scheduled before disabling the
> + * tasklet are completed before continuing with the reset.
> + */
> + if (engine->i915->guc.preempt_wq)
> + flush_workqueue(engine->i915->guc.preempt_wq);
> +
> + return i915_gem_find_active_request(engine);
> +}
> +
> static void reset_irq(struct intel_engine_cs *engine)
> {
> struct drm_i915_private *dev_priv = engine->i915;
> @@ -1692,8 +1730,8 @@ static void reset_irq(struct intel_engine_cs *engine)
> clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> }
>
> -static void reset_common_ring(struct intel_engine_cs *engine,
> - struct i915_request *request)
> +static void execlists_reset(struct intel_engine_cs *engine,
> + struct i915_request *request)
> {
> struct intel_engine_execlists * const execlists = &engine->execlists;
> struct intel_context *ce;
> @@ -1766,6 +1804,11 @@ static void reset_common_ring(struct intel_engine_cs *engine,
> unwind_wa_tail(request);
> }
>
> +static void execlists_reset_finish(struct intel_engine_cs *engine)
> +{
> + tasklet_enable(&engine->execlists.tasklet);
> +}
> +
> static int intel_logical_ring_emit_pdps(struct i915_request *rq)
> {
> struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
> @@ -2090,7 +2133,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
> {
> /* Default vfuncs which can be overriden by each engine. */
> engine->init_hw = gen8_init_common_ring;
> - engine->reset_hw = reset_common_ring;
> +
> + engine->reset.prepare = execlists_reset_prepare;
> + engine->reset.reset = execlists_reset;
> + engine->reset.finish = execlists_reset_finish;
>
> engine->context_pin = execlists_context_pin;
> engine->context_unpin = execlists_context_unpin;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 04d9d9a946a7..eebcc877ef60 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -530,8 +530,16 @@ static int init_ring_common(struct intel_engine_cs *engine)
> return ret;
> }
>
> -static void reset_ring_common(struct intel_engine_cs *engine,
> - struct i915_request *request)
> +static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
> +{
> + if (engine->irq_seqno_barrier)
> + engine->irq_seqno_barrier(engine);
> +
> + return i915_gem_find_active_request(engine);
> +}
> +
> +static void reset_ring(struct intel_engine_cs *engine,
> + struct i915_request *request)
> {
> /*
> * RC6 must be prevented until the reset is complete and the engine
> @@ -595,6 +603,10 @@ static void reset_ring_common(struct intel_engine_cs *engine,
> }
> }
>
> +static void reset_finish(struct intel_engine_cs *engine)
> +{
> +}
> +
> static int intel_rcs_ctx_init(struct i915_request *rq)
> {
> int ret;
> @@ -1987,7 +1999,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
> intel_ring_init_semaphores(dev_priv, engine);
>
> engine->init_hw = init_ring_common;
> - engine->reset_hw = reset_ring_common;
> + engine->reset.prepare = reset_prepare;
> + engine->reset.reset = reset_ring;
> + engine->reset.finish = reset_finish;
>
> engine->context_pin = intel_ring_context_pin;
> engine->context_unpin = intel_ring_context_unpin;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 1f50727a5ddb..e2681303ce21 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -418,8 +418,13 @@ struct intel_engine_cs {
> void (*irq_disable)(struct intel_engine_cs *engine);
>
> int (*init_hw)(struct intel_engine_cs *engine);
> - void (*reset_hw)(struct intel_engine_cs *engine,
> - struct i915_request *rq);
> +
> + struct {
> + struct i915_request *(*prepare)(struct intel_engine_cs *engine);
> + void (*reset)(struct intel_engine_cs *engine,
> + struct i915_request *rq);
> + void (*finish)(struct intel_engine_cs *engine);
> + } reset;
>
> void (*park)(struct intel_engine_cs *engine);
> void (*unpark)(struct intel_engine_cs *engine);
> --
> 2.16.2
>
Reviewed-by: Jeff McGee <jeff.mcgee@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions
2018-03-20 0:18 ` [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions Chris Wilson
@ 2018-03-22 15:28 ` Jeff McGee
0 siblings, 0 replies; 19+ messages in thread
From: Jeff McGee @ 2018-03-22 15:28 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, Mar 20, 2018 at 12:18:47AM +0000, Chris Wilson wrote:
> In the next patch, we will make the execlists reset prepare callback
> take into account preemption by flushing the context-switch handler.
> This is not applicable to the GuC submission backend, so split the two
> into their own backend callbacks.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
> drivers/gpu/drm/i915/intel_guc_submission.c | 39 +++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_lrc.c | 11 +-------
> 2 files changed, 40 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 207cda062626..779c8d3156e5 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -776,6 +776,44 @@ static void guc_submission_tasklet(unsigned long data)
> guc_dequeue(engine);
> }
>
> +static struct i915_request *
> +guc_reset_prepare(struct intel_engine_cs *engine)
> +{
> + struct intel_engine_execlists * const execlists = &engine->execlists;
> +
> + /*
> + * Prevent request submission to the hardware until we have
> + * completed the reset in i915_gem_reset_finish(). If a request
> + * is completed by one engine, it may then queue a request
> + * to a second via its execlists->tasklet *just* as we are
> + * calling engine->init_hw() and also writing the ELSP.
> + * Turning off the execlists->tasklet until the reset is over
> + * prevents the race.
> + *
> + * Note that this needs to be a single atomic operation on the
> + * tasklet (flush existing tasks, prevent new tasks) to prevent
> + * a race between reset and set-wedged. It is not, so we do the best
> + * we can atm and make sure we don't lock the machine up in the more
> + * common case of recursively being called from set-wedged from inside
> + * i915_reset.
> + */
> + if (!atomic_read(&execlists->tasklet.count))
> + tasklet_kill(&execlists->tasklet);
> + tasklet_disable(&execlists->tasklet);
> +
> + /*
> + * We're using worker to queue preemption requests from the tasklet in
> + * GuC submission mode.
> + * Even though tasklet was disabled, we may still have a worker queued.
> + * Let's make sure that all workers scheduled before disabling the
> + * tasklet are completed before continuing with the reset.
> + */
> + if (engine->i915->guc.preempt_wq)
> + flush_workqueue(engine->i915->guc.preempt_wq);
> +
> + return i915_gem_find_active_request(engine);
> +}
> +
> /*
> * Everything below here is concerned with setup & teardown, and is
> * therefore not part of the somewhat time-critical batch-submission
> @@ -1235,6 +1273,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
> &engine->execlists;
>
> execlists->tasklet.func = guc_submission_tasklet;
> + engine->reset.prepare = guc_reset_prepare;
> engine->park = guc_submission_park;
> engine->unpark = guc_submission_unpark;
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f662a9524233..e5a3ffbc273a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1688,16 +1688,6 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> tasklet_kill(&execlists->tasklet);
> tasklet_disable(&execlists->tasklet);
>
> - /*
> - * We're using worker to queue preemption requests from the tasklet in
> - * GuC submission mode.
> - * Even though tasklet was disabled, we may still have a worker queued.
> - * Let's make sure that all workers scheduled before disabling the
> - * tasklet are completed before continuing with the reset.
> - */
> - if (engine->i915->guc.preempt_wq)
> - flush_workqueue(engine->i915->guc.preempt_wq);
> -
> return i915_gem_find_active_request(engine);
> }
>
> @@ -2115,6 +2105,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
> engine->cancel_requests = execlists_cancel_requests;
> engine->schedule = execlists_schedule;
> engine->execlists.tasklet.func = execlists_submission_tasklet;
> + engine->reset.prepare = execlists_reset_prepare;
>
> engine->park = NULL;
> engine->unpark = NULL;
> --
> 2.16.2
>
Reviewed-by: Jeff McGee <jeff.mcgee@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset
2018-03-22 15:14 ` Jeff McGee
@ 2018-03-26 11:28 ` Chris Wilson
0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2018-03-26 11:28 UTC (permalink / raw)
To: Jeff McGee; +Cc: intel-gfx
Quoting Jeff McGee (2018-03-22 15:14:01)
> On Tue, Mar 20, 2018 at 05:17:34PM -0700, Jeff McGee wrote:
> > On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote:
> > > Catch up with the inflight CSB events, after disabling the tasklet
> > > before deciding which request was truly guilty of hanging the GPU.
> > >
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > > CC: Michel Thierry <michel.thierry@intel.com>
> > > Cc: Jeff McGee <jeff.mcgee@intel.com>
> > > ---
> > > drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
> > > 1 file changed, 197 insertions(+), 158 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > > index e5a3ffbc273a..beb81f13a3cc 100644
> > > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > > @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> > > local_irq_restore(flags);
> > > }
> > >
> > > -/*
> > > - * Check the unread Context Status Buffers and manage the submission of new
> > > - * contexts to the ELSP accordingly.
> > > - */
> > > -static void execlists_submission_tasklet(unsigned long data)
> > > +static void process_csb(struct intel_engine_cs *engine)
> > > {
> > > - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> > > struct intel_engine_execlists * const execlists = &engine->execlists;
> > > struct execlist_port * const port = execlists->port;
> > > - struct drm_i915_private *dev_priv = engine->i915;
> > > + struct drm_i915_private *i915 = engine->i915;
> > > + unsigned int head, tail;
> > > + const u32 *buf;
> > > bool fw = false;
> > >
> > > - /* We can skip acquiring intel_runtime_pm_get() here as it was taken
> > > - * on our behalf by the request (see i915_gem_mark_busy()) and it will
> > > - * not be relinquished until the device is idle (see
> > > - * i915_gem_idle_work_handler()). As a precaution, we make sure
> > > - * that all ELSP are drained i.e. we have processed the CSB,
> > > - * before allowing ourselves to idle and calling intel_runtime_pm_put().
> > > - */
> > > - GEM_BUG_ON(!dev_priv->gt.awake);
> > > + if (unlikely(execlists->csb_use_mmio)) {
> > > + buf = (u32 * __force)
> > > + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > > + execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> > > + } else {
> > > + /* The HWSP contains a (cacheable) mirror of the CSB */
> > > + buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > > + }
> > >
> > > - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
> > > - * imposing the cost of a locked atomic transaction when submitting a
> > > - * new request (outside of the context-switch interrupt).
> > > + /*
> > > + * The write will be ordered by the uncached read (itself
> > > + * a memory barrier), so we do not need another in the form
> > > + * of a locked instruction. The race between the interrupt
> > > + * handler and the split test/clear is harmless as we order
> > > + * our clear before the CSB read. If the interrupt arrived
> > > + * first between the test and the clear, we read the updated
> > > + * CSB and clear the bit. If the interrupt arrives as we read
> > > + * the CSB or later (i.e. after we had cleared the bit) the bit
> > > + * is set and we do a new loop.
> > > */
> > > - while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
> > > - /* The HWSP contains a (cacheable) mirror of the CSB */
> > > - const u32 *buf =
> > > - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> > > - unsigned int head, tail;
> > > -
> > > - if (unlikely(execlists->csb_use_mmio)) {
> > > - buf = (u32 * __force)
> > > - (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> > > - execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> > > - }
> > > + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > > + if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> > > + intel_uncore_forcewake_get(i915, execlists->fw_domains);
> > > + fw = true;
> > > +
> > > + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > > + tail = GEN8_CSB_WRITE_PTR(head);
> > > + head = GEN8_CSB_READ_PTR(head);
> > > + execlists->csb_head = head;
> > > + } else {
> > > + const int write_idx =
> > > + intel_hws_csb_write_index(i915) -
> > > + I915_HWS_CSB_BUF0_INDEX;
> > > +
> > > + head = execlists->csb_head;
> > > + tail = READ_ONCE(buf[write_idx]);
> > > + }
> > > + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > > + engine->name,
> > > + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > > + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > > +
> > > + while (head != tail) {
> > > + struct i915_request *rq;
> > > + unsigned int status;
> > > + unsigned int count;
> > > +
> > > + if (++head == GEN8_CSB_ENTRIES)
> > > + head = 0;
> > >
> > > - /* The write will be ordered by the uncached read (itself
> > > - * a memory barrier), so we do not need another in the form
> > > - * of a locked instruction. The race between the interrupt
> > > - * handler and the split test/clear is harmless as we order
> > > - * our clear before the CSB read. If the interrupt arrived
> > > - * first between the test and the clear, we read the updated
> > > - * CSB and clear the bit. If the interrupt arrives as we read
> > > - * the CSB or later (i.e. after we had cleared the bit) the bit
> > > - * is set and we do a new loop.
> > > + /*
> > > + * We are flying near dragons again.
> > > + *
> > > + * We hold a reference to the request in execlist_port[]
> > > + * but no more than that. We are operating in softirq
> > > + * context and so cannot hold any mutex or sleep. That
> > > + * prevents us stopping the requests we are processing
> > > + * in port[] from being retired simultaneously (the
> > > + * breadcrumb will be complete before we see the
> > > + * context-switch). As we only hold the reference to the
> > > + * request, any pointer chasing underneath the request
> > > + * is subject to a potential use-after-free. Thus we
> > > + * store all of the bookkeeping within port[] as
> > > + * required, and avoid using unguarded pointers beneath
> > > + * request itself. The same applies to the atomic
> > > + * status notifier.
> > > */
> > > - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > > - if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> > > - if (!fw) {
> > > - intel_uncore_forcewake_get(dev_priv,
> > > - execlists->fw_domains);
> > > - fw = true;
> > > - }
> > >
> > > - head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > > - tail = GEN8_CSB_WRITE_PTR(head);
> > > - head = GEN8_CSB_READ_PTR(head);
> > > - execlists->csb_head = head;
> > > - } else {
> > > - const int write_idx =
> > > - intel_hws_csb_write_index(dev_priv) -
> > > - I915_HWS_CSB_BUF0_INDEX;
> > > + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > > + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > > + engine->name, head,
> > > + status, buf[2*head + 1],
> > > + execlists->active);
> > > +
> > > + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > > + GEN8_CTX_STATUS_PREEMPTED))
> > > + execlists_set_active(execlists,
> > > + EXECLISTS_ACTIVE_HWACK);
> > > + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > > + execlists_clear_active(execlists,
> > > + EXECLISTS_ACTIVE_HWACK);
> > > +
> > > + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > > + continue;
> > >
> > > - head = execlists->csb_head;
> > > - tail = READ_ONCE(buf[write_idx]);
> > > - }
> > > - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> > > - engine->name,
> > > - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> > > - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> > > + /* We should never get a COMPLETED | IDLE_ACTIVE! */
> > > + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> > >
> > > - while (head != tail) {
> > > - struct i915_request *rq;
> > > - unsigned int status;
> > > - unsigned int count;
> > > + if (status & GEN8_CTX_STATUS_COMPLETE &&
> > > + buf[2*head + 1] == execlists->preempt_complete_status) {
> > > + GEM_TRACE("%s preempt-idle\n", engine->name);
> > > + complete_preempt_context(execlists);
> > > + continue;
> > > + }
> > >
> > > - if (++head == GEN8_CSB_ENTRIES)
> > > - head = 0;
> > > + if (status & GEN8_CTX_STATUS_PREEMPTED &&
> > > + execlists_is_active(execlists,
> > > + EXECLISTS_ACTIVE_PREEMPT))
> > > + continue;
> > >
> > > - /* We are flying near dragons again.
> > > - *
> > > - * We hold a reference to the request in execlist_port[]
> > > - * but no more than that. We are operating in softirq
> > > - * context and so cannot hold any mutex or sleep. That
> > > - * prevents us stopping the requests we are processing
> > > - * in port[] from being retired simultaneously (the
> > > - * breadcrumb will be complete before we see the
> > > - * context-switch). As we only hold the reference to the
> > > - * request, any pointer chasing underneath the request
> > > - * is subject to a potential use-after-free. Thus we
> > > - * store all of the bookkeeping within port[] as
> > > - * required, and avoid using unguarded pointers beneath
> > > - * request itself. The same applies to the atomic
> > > - * status notifier.
> > > - */
> > > + GEM_BUG_ON(!execlists_is_active(execlists,
> > > + EXECLISTS_ACTIVE_USER));
> > >
> > > - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> > > - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> > > - engine->name, head,
> > > - status, buf[2*head + 1],
> > > - execlists->active);
> > > -
> > > - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> > > - GEN8_CTX_STATUS_PREEMPTED))
> > > - execlists_set_active(execlists,
> > > - EXECLISTS_ACTIVE_HWACK);
> > > - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> > > - execlists_clear_active(execlists,
> > > - EXECLISTS_ACTIVE_HWACK);
> > > -
> > > - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> > > - continue;
> > > + rq = port_unpack(port, &count);
> > > + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> > > + engine->name,
> > > + port->context_id, count,
> > > + rq ? rq->global_seqno : 0,
> > > + rq ? rq_prio(rq) : 0);
> > > +
> > > + /* Check the context/desc id for this event matches */
> > > + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> > > +
> > > + GEM_BUG_ON(count == 0);
> > > + if (--count == 0) {
> > > + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> > > + GEM_BUG_ON(port_isset(&port[1]) &&
> > > + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> > > + GEM_BUG_ON(!i915_request_completed(rq));
> > > + execlists_context_schedule_out(rq);
> > > + trace_i915_request_out(rq);
> > > + i915_request_put(rq);
> > > +
> > > + GEM_TRACE("%s completed ctx=%d\n",
> > > + engine->name, port->context_id);
> > > +
> > > + execlists_port_complete(execlists, port);
> > > + } else {
> > > + port_set(port, port_pack(rq, count));
> > > + }
> > >
> > > - /* We should never get a COMPLETED | IDLE_ACTIVE! */
> > > - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> > > + /* After the final element, the hw should be idle */
> > > + GEM_BUG_ON(port_count(port) == 0 &&
> > > + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > > + if (port_count(port) == 0)
> > > + execlists_clear_active(execlists,
> > > + EXECLISTS_ACTIVE_USER);
> > > + }
> > >
> > > - if (status & GEN8_CTX_STATUS_COMPLETE &&
> > > - buf[2*head + 1] == execlists->preempt_complete_status) {
> > > - GEM_TRACE("%s preempt-idle\n", engine->name);
> > > - complete_preempt_context(execlists);
> > > - continue;
> > > - }
> > > + if (head != execlists->csb_head) {
> > > + execlists->csb_head = head;
> > > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > > + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > > + }
> > >
> > > - if (status & GEN8_CTX_STATUS_PREEMPTED &&
> > > - execlists_is_active(execlists,
> > > - EXECLISTS_ACTIVE_PREEMPT))
> > > - continue;
> > > + if (unlikely(fw))
> > > + intel_uncore_forcewake_put(i915, execlists->fw_domains);
> > > +}
> > >
> > > - GEM_BUG_ON(!execlists_is_active(execlists,
> > > - EXECLISTS_ACTIVE_USER));
> > > -
> > > - rq = port_unpack(port, &count);
> > > - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
> > > - engine->name,
> > > - port->context_id, count,
> > > - rq ? rq->global_seqno : 0,
> > > - rq ? rq_prio(rq) : 0);
> > > -
> > > - /* Check the context/desc id for this event matches */
> > > - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> > > -
> > > - GEM_BUG_ON(count == 0);
> > > - if (--count == 0) {
> > > - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> > > - GEM_BUG_ON(port_isset(&port[1]) &&
> > > - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> > > - GEM_BUG_ON(!i915_request_completed(rq));
> > > - execlists_context_schedule_out(rq);
> > > - trace_i915_request_out(rq);
> > > - i915_request_put(rq);
> > > -
> > > - GEM_TRACE("%s completed ctx=%d\n",
> > > - engine->name, port->context_id);
> > > -
> > > - execlists_port_complete(execlists, port);
> > > - } else {
> > > - port_set(port, port_pack(rq, count));
> > > - }
> > > +/*
> > > + * Check the unread Context Status Buffers and manage the submission of new
> > > + * contexts to the ELSP accordingly.
> > > + */
> > > +static void execlists_submission_tasklet(unsigned long data)
> > > +{
> > > + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
> > >
> > > - /* After the final element, the hw should be idle */
> > > - GEM_BUG_ON(port_count(port) == 0 &&
> > > - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> > > - if (port_count(port) == 0)
> > > - execlists_clear_active(execlists,
> > > - EXECLISTS_ACTIVE_USER);
> > > - }
> > > + /*
> > > + * We can skip acquiring intel_runtime_pm_get() here as it was taken
> > > + * on our behalf by the request (see i915_gem_mark_busy()) and it will
> > > + * not be relinquished until the device is idle (see
> > > + * i915_gem_idle_work_handler()). As a precaution, we make sure
> > > + * that all ELSP are drained i.e. we have processed the CSB,
> > > + * before allowing ourselves to idle and calling intel_runtime_pm_put().
> > > + */
> > > + GEM_BUG_ON(!engine->i915->gt.awake);
> > >
> > > - if (head != execlists->csb_head) {
> > > - execlists->csb_head = head;
> > > - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> > > - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> > > - }
> > > - }
> > > + /*
> > > + * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> > > + * imposing the cost of a locked atomic transaction when submitting a
> > > + * new request (outside of the context-switch interrupt).
> > > + */
> > > + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> > This was a 'while' before refactoring. Shouldn't it still be in order to catch
> > new irq_posted during processing?
> >
> > BTW, I have revised and rebased the force preemption patches on drm-tip with
> > this and the other related patches you have proposed. I just started running
> > my IGT test that covers the innocent context on port[1] scenario that we
> > discussed on IRC. Getting a sporadic failure but need more time to root cause.
> > Will update soon.
> >
> > > + process_csb(engine);
> > >
> > > - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> > > + if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> > > execlists_dequeue(engine);
> > > -
> > > - if (fw)
> > > - intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> > > }
> > >
> > > static void queue_request(struct intel_engine_cs *engine,
> > > @@ -1667,6 +1673,7 @@ static struct i915_request *
> > > execlists_reset_prepare(struct intel_engine_cs *engine)
> > > {
> > > struct intel_engine_execlists * const execlists = &engine->execlists;
> > > + struct i915_request *request, *active;
> > >
> > > /*
> > > * Prevent request submission to the hardware until we have
> > > @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> > > tasklet_kill(&execlists->tasklet);
> > > tasklet_disable(&execlists->tasklet);
> > >
> > > - return i915_gem_find_active_request(engine);
> > > + /*
> > > + * We want to flush the pending context switches, having disabled
> > > + * the tasklet above, we can assume exclusive access to the execlists.
> > > + * For this allows us to catch up with an inflight preemption event,
> > > + * and avoid blaming an innocent request if the stall was due to the
> > > + * preemption itself.
> > > + */
> > > + process_csb(engine);
> > > +
> > > + /*
> > > + * The last active request can then be no later than the last request
> > > + * now in ELSP[0]. So search backwards from there, so that is the GPU
> > > + * has advanced beyond the last CSB update, it will be pardoned.
> > > + */
> > > + active = NULL;
> > > + request = port_request(execlists->port);
> > > + if (request) {
> > > + unsigned long flags;
> > > +
> > > + spin_lock_irqsave(&engine->timeline->lock, flags);
> > > + list_for_each_entry_from_reverse(request,
> > > + &engine->timeline->requests,
> > > + link) {
> > > + if (__i915_request_completed(request,
> > > + request->global_seqno))
> > > + break;
> > > +
> > > + active = request;
> > > + }
> > > + spin_unlock_irqrestore(&engine->timeline->lock, flags);
> > > + }
> > > +
> > > + return active;
>
> Now we can see an abort of the reset if process_csb() processes a completed
> preemption. So we need to kick the tasklet to get a dequeue of the waiting
> request to happen. Currently we only kick if needed in gen8_init_common_ring
> which is skipped if we abort the reset.
Yes, it is imperative that the tasklet be disabled and process_csb() be
manually flushed in the preemption timeout (because of ksortirqd we may
not have run the submission tasklet at all before the timer expires).
Hence the desire to split out process_csb().
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2018-03-26 11:28 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-20 0:18 [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Chris Wilson
2018-03-20 0:18 ` [PATCH 2/5] drm/i915/execlists: Refactor out complete_preempt_context() Chris Wilson
2018-03-22 15:26 ` Jeff McGee
2018-03-20 0:18 ` [PATCH 3/5] drm/i915: Move engine reset prepare/finish to backends Chris Wilson
2018-03-22 15:28 ` Jeff McGee
2018-03-20 0:18 ` [PATCH 4/5] drm/i915: Split execlists/guc reset prepartions Chris Wilson
2018-03-22 15:28 ` Jeff McGee
2018-03-20 0:18 ` [PATCH 5/5] drm/i915/execlists: Flush pending preemption events during reset Chris Wilson
2018-03-21 0:17 ` Jeff McGee
2018-03-22 15:14 ` Jeff McGee
2018-03-26 11:28 ` Chris Wilson
2018-03-20 0:39 ` [PATCH 1/5] drm/i915: Add control flags to i915_handle_error() Michel Thierry
2018-03-20 0:44 ` Chris Wilson
2018-03-20 0:56 ` Michel Thierry
2018-03-20 1:09 ` Chris Wilson
2018-03-20 1:24 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] " Patchwork
2018-03-20 1:25 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-03-20 1:41 ` ✓ Fi.CI.BAT: success " Patchwork
2018-03-20 6:45 ` ✗ Fi.CI.IGT: failure " Patchwork
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.