* [PATCH v2] drm/i915: Record more information about the hanging contexts
@ 2017-01-29 9:24 Chris Wilson
2017-01-30 15:24 ` Mika Kuoppala
2017-01-30 17:24 ` ✗ Fi.CI.BAT: warning for drm/i915: Record more information about the hanging contexts (rev2) Patchwork
0 siblings, 2 replies; 3+ messages in thread
From: Chris Wilson @ 2017-01-29 9:24 UTC (permalink / raw)
To: intel-gfx; +Cc: Mika Kuoppala
Include extra information such as the user_handle and hw_id so that
userspace can identify which of their contexts hung, useful if they are
performing self-diagnositics.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 14 +++++--
drivers/gpu/drm/i915/i915_gpu_error.c | 77 ++++++++++++++++++++++-------------
2 files changed, 59 insertions(+), 32 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c1fde816db63..7e7bc4504c94 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -970,6 +970,16 @@ struct drm_i915_error_state {
u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
struct intel_instdone instdone;
+ struct drm_i915_error_context {
+ char comm[TASK_COMM_LEN];
+ int pid;
+ u32 handle;
+ u32 hw_id;
+ int ban_score;
+ int active;
+ int guilty;
+ } context;
+
struct drm_i915_error_object {
u64 gtt_offset;
u64 gtt_size;
@@ -1003,10 +1013,6 @@ struct drm_i915_error_state {
u32 pp_dir_base;
};
} vm_info;
-
- pid_t pid;
- char comm[TASK_COMM_LEN];
- int context_bans;
} engine[I915_NUM_ENGINES];
struct drm_i915_error_buffer {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e5375323eb06..5283fe815a4d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -384,6 +384,15 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
erq->head, erq->tail);
}
+static void error_print_context(struct drm_i915_error_state_buf *m,
+ const char *header,
+ struct drm_i915_error_context *ctx)
+{
+ err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n",
+ header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
+ ctx->ban_score, ctx->guilty, ctx->active);
+}
+
static void error_print_engine(struct drm_i915_error_state_buf *m,
struct drm_i915_error_engine *ee)
{
@@ -457,6 +466,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
+ error_print_context(m, " Active context: ", &ee->context);
}
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -562,12 +572,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
if (error->engine[i].hangcheck_stalled &&
- error->engine[i].pid != -1) {
- err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n",
+ error->engine[i].context.pid) {
+ err_printf(m, "Active process (on ring %s): %s [%d], score %d\n",
engine_str(i),
- error->engine[i].comm,
- error->engine[i].pid,
- error->engine[i].context_bans);
+ error->engine[i].context.comm,
+ error->engine[i].context.pid,
+ error->engine[i].context.ban_score);
}
}
err_printf(m, "Reset count: %u\n", error->reset_count);
@@ -658,11 +668,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
obj = ee->batchbuffer;
if (obj) {
err_puts(m, dev_priv->engine[i]->name);
- if (ee->pid != -1)
- err_printf(m, " (submitted by %s [%d], bans %d)",
- ee->comm,
- ee->pid,
- ee->context_bans);
+ if (ee->context.pid)
+ err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)",
+ ee->context.comm,
+ ee->context.pid,
+ ee->context.handle,
+ ee->context.hw_id,
+ ee->context.ban_score);
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
upper_32_bits(obj->gtt_offset),
lower_32_bits(obj->gtt_offset));
@@ -1267,6 +1279,28 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine,
&ee->execlist[n]);
}
+static void record_context(struct drm_i915_error_context *e,
+ struct i915_gem_context *ctx)
+{
+ if (ctx->pid) {
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(ctx->pid, PIDTYPE_PID);
+ if (task) {
+ strcpy(e->comm, task->comm);
+ e->pid = task->pid;
+ }
+ rcu_read_unlock();
+ }
+
+ e->handle = ctx->user_handle;
+ e->hw_id = ctx->hw_id;
+ e->ban_score = ctx->ban_score;
+ e->guilty = ctx->guilty_count;
+ e->active = ctx->active_count;
+}
+
static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
@@ -1281,7 +1315,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct drm_i915_error_engine *ee = &error->engine[i];
struct drm_i915_gem_request *request;
- ee->pid = -1;
ee->engine_id = -1;
if (!engine)
@@ -1296,11 +1329,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
request = i915_gem_find_active_request(engine);
if (request) {
struct intel_ring *ring;
- struct pid *pid;
ee->vm = request->ctx->ppgtt ?
&request->ctx->ppgtt->base : &ggtt->base;
+ record_context(&ee->context, request->ctx);
+
/* We need to copy these to an anonymous buffer
* as the simplest method to avoid being overwritten
* by userspace.
@@ -1318,19 +1352,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
i915_error_object_create(dev_priv,
request->ctx->engine[i].state);
- pid = request->ctx->pid;
- if (pid) {
- struct task_struct *task;
-
- rcu_read_lock();
- task = pid_task(pid, PIDTYPE_PID);
- if (task) {
- strcpy(ee->comm, task->comm);
- ee->pid = task->pid;
- }
- rcu_read_unlock();
- }
-
error->simulated |=
i915_gem_context_no_error_capture(request->ctx);
@@ -1534,12 +1555,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
"GPU HANG: ecode %d:%d:0x%08x",
INTEL_GEN(dev_priv), engine_id, ecode);
- if (engine_id != -1 && error->engine[engine_id].pid != -1)
+ if (engine_id != -1 && error->engine[engine_id].context.pid)
len += scnprintf(error->error_msg + len,
sizeof(error->error_msg) - len,
", in %s [%d]",
- error->engine[engine_id].comm,
- error->engine[engine_id].pid);
+ error->engine[engine_id].context.comm,
+ error->engine[engine_id].context.pid);
scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
", reason: %s, action: %s",
--
2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] drm/i915: Record more information about the hanging contexts
2017-01-29 9:24 [PATCH v2] drm/i915: Record more information about the hanging contexts Chris Wilson
@ 2017-01-30 15:24 ` Mika Kuoppala
2017-01-30 17:24 ` ✗ Fi.CI.BAT: warning for drm/i915: Record more information about the hanging contexts (rev2) Patchwork
1 sibling, 0 replies; 3+ messages in thread
From: Mika Kuoppala @ 2017-01-30 15:24 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Include extra information such as the user_handle and hw_id so that
> userspace can identify which of their contexts hung, useful if they are
> performing self-diagnositics.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 14 +++++--
> drivers/gpu/drm/i915/i915_gpu_error.c | 77 ++++++++++++++++++++++-------------
> 2 files changed, 59 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c1fde816db63..7e7bc4504c94 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -970,6 +970,16 @@ struct drm_i915_error_state {
> u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
> struct intel_instdone instdone;
>
> + struct drm_i915_error_context {
> + char comm[TASK_COMM_LEN];
> + int pid;
s/int/pid_t
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> + u32 handle;
> + u32 hw_id;
> + int ban_score;
> + int active;
> + int guilty;
> + } context;
> +
> struct drm_i915_error_object {
> u64 gtt_offset;
> u64 gtt_size;
> @@ -1003,10 +1013,6 @@ struct drm_i915_error_state {
> u32 pp_dir_base;
> };
> } vm_info;
> -
> - pid_t pid;
> - char comm[TASK_COMM_LEN];
> - int context_bans;
> } engine[I915_NUM_ENGINES];
>
> struct drm_i915_error_buffer {
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index e5375323eb06..5283fe815a4d 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -384,6 +384,15 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
> erq->head, erq->tail);
> }
>
> +static void error_print_context(struct drm_i915_error_state_buf *m,
> + const char *header,
> + struct drm_i915_error_context *ctx)
> +{
> + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n",
> + header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
> + ctx->ban_score, ctx->guilty, ctx->active);
> +}
> +
> static void error_print_engine(struct drm_i915_error_state_buf *m,
> struct drm_i915_error_engine *ee)
> {
> @@ -457,6 +466,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
>
> error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
> error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
> + error_print_context(m, " Active context: ", &ee->context);
> }
>
> void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
> @@ -562,12 +572,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>
> for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
> if (error->engine[i].hangcheck_stalled &&
> - error->engine[i].pid != -1) {
> - err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n",
> + error->engine[i].context.pid) {
> + err_printf(m, "Active process (on ring %s): %s [%d], score %d\n",
> engine_str(i),
> - error->engine[i].comm,
> - error->engine[i].pid,
> - error->engine[i].context_bans);
> + error->engine[i].context.comm,
> + error->engine[i].context.pid,
> + error->engine[i].context.ban_score);
> }
> }
> err_printf(m, "Reset count: %u\n", error->reset_count);
> @@ -658,11 +668,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> obj = ee->batchbuffer;
> if (obj) {
> err_puts(m, dev_priv->engine[i]->name);
> - if (ee->pid != -1)
> - err_printf(m, " (submitted by %s [%d], bans %d)",
> - ee->comm,
> - ee->pid,
> - ee->context_bans);
> + if (ee->context.pid)
> + err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)",
> + ee->context.comm,
> + ee->context.pid,
> + ee->context.handle,
> + ee->context.hw_id,
> + ee->context.ban_score);
> err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
> upper_32_bits(obj->gtt_offset),
> lower_32_bits(obj->gtt_offset));
> @@ -1267,6 +1279,28 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine,
> &ee->execlist[n]);
> }
>
> +static void record_context(struct drm_i915_error_context *e,
> + struct i915_gem_context *ctx)
> +{
> + if (ctx->pid) {
> + struct task_struct *task;
> +
> + rcu_read_lock();
> + task = pid_task(ctx->pid, PIDTYPE_PID);
> + if (task) {
> + strcpy(e->comm, task->comm);
> + e->pid = task->pid;
> + }
> + rcu_read_unlock();
> + }
> +
> + e->handle = ctx->user_handle;
> + e->hw_id = ctx->hw_id;
> + e->ban_score = ctx->ban_score;
> + e->guilty = ctx->guilty_count;
> + e->active = ctx->active_count;
> +}
> +
> static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
> struct drm_i915_error_state *error)
> {
> @@ -1281,7 +1315,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
> struct drm_i915_error_engine *ee = &error->engine[i];
> struct drm_i915_gem_request *request;
>
> - ee->pid = -1;
> ee->engine_id = -1;
>
> if (!engine)
> @@ -1296,11 +1329,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
> request = i915_gem_find_active_request(engine);
> if (request) {
> struct intel_ring *ring;
> - struct pid *pid;
>
> ee->vm = request->ctx->ppgtt ?
> &request->ctx->ppgtt->base : &ggtt->base;
>
> + record_context(&ee->context, request->ctx);
> +
> /* We need to copy these to an anonymous buffer
> * as the simplest method to avoid being overwritten
> * by userspace.
> @@ -1318,19 +1352,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
> i915_error_object_create(dev_priv,
> request->ctx->engine[i].state);
>
> - pid = request->ctx->pid;
> - if (pid) {
> - struct task_struct *task;
> -
> - rcu_read_lock();
> - task = pid_task(pid, PIDTYPE_PID);
> - if (task) {
> - strcpy(ee->comm, task->comm);
> - ee->pid = task->pid;
> - }
> - rcu_read_unlock();
> - }
> -
> error->simulated |=
> i915_gem_context_no_error_capture(request->ctx);
>
> @@ -1534,12 +1555,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
> "GPU HANG: ecode %d:%d:0x%08x",
> INTEL_GEN(dev_priv), engine_id, ecode);
>
> - if (engine_id != -1 && error->engine[engine_id].pid != -1)
> + if (engine_id != -1 && error->engine[engine_id].context.pid)
> len += scnprintf(error->error_msg + len,
> sizeof(error->error_msg) - len,
> ", in %s [%d]",
> - error->engine[engine_id].comm,
> - error->engine[engine_id].pid);
> + error->engine[engine_id].context.comm,
> + error->engine[engine_id].context.pid);
>
> scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
> ", reason: %s, action: %s",
> --
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 3+ messages in thread
* ✗ Fi.CI.BAT: warning for drm/i915: Record more information about the hanging contexts (rev2)
2017-01-29 9:24 [PATCH v2] drm/i915: Record more information about the hanging contexts Chris Wilson
2017-01-30 15:24 ` Mika Kuoppala
@ 2017-01-30 17:24 ` Patchwork
1 sibling, 0 replies; 3+ messages in thread
From: Patchwork @ 2017-01-30 17:24 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Record more information about the hanging contexts (rev2)
URL : https://patchwork.freedesktop.org/series/18662/
State : warning
== Summary ==
Series 18662v2 drm/i915: Record more information about the hanging contexts
https://patchwork.freedesktop.org/api/1.0/series/18662/revisions/2/mbox/
Test kms_force_connector_basic:
Subgroup force-connector-state:
pass -> DMESG-WARN (fi-snb-2520m)
fi-bdw-5557u total:246 pass:232 dwarn:0 dfail:0 fail:0 skip:14
fi-bsw-n3050 total:246 pass:207 dwarn:0 dfail:0 fail:0 skip:39
fi-bxt-j4205 total:246 pass:224 dwarn:0 dfail:0 fail:0 skip:22
fi-bxt-t5700 total:78 pass:65 dwarn:0 dfail:0 fail:0 skip:12
fi-byt-j1900 total:246 pass:219 dwarn:0 dfail:0 fail:0 skip:27
fi-byt-n2820 total:246 pass:215 dwarn:0 dfail:0 fail:0 skip:31
fi-hsw-4770 total:246 pass:227 dwarn:0 dfail:0 fail:0 skip:19
fi-hsw-4770r total:246 pass:227 dwarn:0 dfail:0 fail:0 skip:19
fi-ivb-3520m total:246 pass:225 dwarn:0 dfail:0 fail:0 skip:21
fi-ivb-3770 total:246 pass:225 dwarn:0 dfail:0 fail:0 skip:21
fi-kbl-7500u total:246 pass:223 dwarn:0 dfail:0 fail:2 skip:21
fi-skl-6260u total:246 pass:233 dwarn:0 dfail:0 fail:0 skip:13
fi-skl-6700hq total:246 pass:226 dwarn:0 dfail:0 fail:0 skip:20
fi-skl-6700k total:246 pass:221 dwarn:4 dfail:0 fail:0 skip:21
fi-skl-6770hq total:246 pass:233 dwarn:0 dfail:0 fail:0 skip:13
fi-snb-2520m total:246 pass:214 dwarn:1 dfail:0 fail:0 skip:31
fi-snb-2600 total:246 pass:214 dwarn:0 dfail:0 fail:0 skip:32
e1cc28133f0474a85da1fc4017686c701b564312 drm-tip: 2017y-01m-30d-15h-50m-59s UTC integration manifest
1e19d96 drm/i915: Record more information about the hanging contexts
== Logs ==
For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3640/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-01-30 17:24 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-29 9:24 [PATCH v2] drm/i915: Record more information about the hanging contexts Chris Wilson
2017-01-30 15:24 ` Mika Kuoppala
2017-01-30 17:24 ` ✗ Fi.CI.BAT: warning for drm/i915: Record more information about the hanging contexts (rev2) Patchwork
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.