* [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load
@ 2017-05-15 16:35 Daniele Ceraolo Spurio
2017-05-15 16:53 ` ✓ Fi.CI.BAT: success for drm/i915/guc: capture GuC logs if FW fails to load (rev4) Patchwork
2017-05-15 17:41 ` [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load Michal Wajdeczko
0 siblings, 2 replies; 5+ messages in thread
From: Daniele Ceraolo Spurio @ 2017-05-15 16:35 UTC (permalink / raw)
To: intel-gfx
We're currently deleting the GuC logs if the FW fails to load, but those
are still useful to understand why the loading failed. Keeping the
object around allows us to access them after driver load is completed.
v2: keep the object around instead of using kernel memory (chris)
don't store the logs in the gpu_error struct (Chris)
add a check on guc_log_level to avoid snapshotting empty logs
v3: use separate debugfs for error log (Chris)
v4: rebased
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v3)
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 35 ++++++++++++++++++++++-------------
drivers/gpu/drm/i915/i915_drv.c | 3 +++
drivers/gpu/drm/i915/intel_guc_log.c | 17 +++++++++++++++++
drivers/gpu/drm/i915/intel_uc.c | 7 +++++--
drivers/gpu/drm/i915/intel_uc.h | 5 +++++
5 files changed, 52 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bd9abef..d9a5bd8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2607,27 +2607,35 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
static int i915_guc_log_dump(struct seq_file *m, void *data)
{
- struct drm_i915_private *dev_priv = node_to_i915(m->private);
+ struct drm_info_node *node = m->private;
+ struct drm_i915_private *dev_priv = node_to_i915(node);
+ bool dump_err_log = !!node->info_ent->data;
struct drm_i915_gem_object *obj;
- int i = 0, pg;
+ u32 *log;
+ int i = 0;
- if (!dev_priv->guc.log.vma)
+ if (!dump_err_log && dev_priv->guc.log.vma)
+ obj = dev_priv->guc.log.vma->obj;
+ else if (dump_err_log && dev_priv->guc.err_load_log)
+ obj = dev_priv->guc.err_load_log;
+ else
return 0;
- obj = dev_priv->guc.log.vma->obj;
- for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
- u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
-
- for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
- seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
- *(log + i), *(log + i + 1),
- *(log + i + 2), *(log + i + 3));
-
- kunmap_atomic(log);
+ log = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(log)) {
+ DRM_ERROR("Failed to pin guc_log object\n");
+ return PTR_ERR(log);
}
+ for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+ seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+ *(log + i), *(log + i + 1),
+ *(log + i + 2), *(log + i + 3));
+
seq_putc(m, '\n');
+ i915_gem_object_unpin_map(obj);
+
return 0;
}
@@ -4811,6 +4819,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
{"i915_guc_info", i915_guc_info, 0},
{"i915_guc_load_status", i915_guc_load_status_info, 0},
{"i915_guc_log_dump", i915_guc_log_dump, 0},
+ {"i915_guc_err_load_log_dump", i915_guc_log_dump, 0, (void *)1},
{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
{"i915_huc_load_status", i915_huc_load_status_info, 0},
{"i915_frequency_info", i915_frequency_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 72fb47a..d309165 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1363,6 +1363,9 @@ void i915_driver_unload(struct drm_device *dev)
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
i915_reset_error_state(dev_priv);
+ /* release GuC error log (if any) */
+ i915_guc_load_error_log_free(&dev_priv->guc);
+
/* Flush any outstanding unpin_work. */
drain_workqueue(dev_priv->wq);
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 16d3b87..691da42 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
guc_log_runtime_destroy(&dev_priv->guc);
mutex_unlock(&dev_priv->drm.struct_mutex);
}
+
+void i915_guc_load_error_log_capture(struct intel_guc *guc)
+{
+ if (!guc->log.vma || i915.guc_log_level < 0)
+ return;
+
+ if (!guc->err_load_log)
+ guc->err_load_log = i915_gem_object_get(guc->log.vma->obj);
+
+ return;
+}
+
+void i915_guc_load_error_log_free(struct intel_guc *guc)
+{
+ if (guc->err_load_log)
+ i915_gem_object_put(guc->err_load_log);
+}
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 07c5658..3669b57 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -309,6 +309,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
guc_disable_communication(guc);
gen9_reset_guc_interrupts(dev_priv);
+ i915_guc_load_error_log_free(guc);
/* We need to notify the guc whenever we change the GGTT */
i915_ggtt_enable_guc(dev_priv);
@@ -355,11 +356,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
/* Did we succeded or run out of retries? */
if (ret)
- goto err_submission;
+ goto err_log_capture;
ret = guc_enable_communication(guc);
if (ret)
- goto err_submission;
+ goto err_log_capture;
intel_guc_auth_huc(dev_priv);
if (i915.enable_guc_submission) {
@@ -385,6 +386,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
err_interrupts:
guc_disable_communication(guc);
gen9_disable_guc_interrupts(dev_priv);
+err_log_capture:
+ i915_guc_load_error_log_capture(guc);
err_submission:
if (i915.enable_guc_submission)
i915_guc_submission_fini(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 7618b71..d2ed030 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -220,6 +220,9 @@ struct intel_guc {
/* GuC's FW specific notify function */
void (*notify)(struct intel_guc *guc);
+
+ /* Log snapshot if GuC errors during load */
+ struct drm_i915_gem_object *err_load_log;
};
struct intel_huc {
@@ -272,6 +275,8 @@ static inline void intel_guc_notify(struct intel_guc *guc)
int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
void i915_guc_log_register(struct drm_i915_private *dev_priv);
void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
+void i915_guc_load_error_log_capture(struct intel_guc *guc);
+void i915_guc_load_error_log_free(struct intel_guc *guc);
static inline u32 guc_ggtt_offset(struct i915_vma *vma)
{
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915/guc: capture GuC logs if FW fails to load (rev4)
2017-05-15 16:35 [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load Daniele Ceraolo Spurio
@ 2017-05-15 16:53 ` Patchwork
2017-05-15 17:41 ` [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load Michal Wajdeczko
1 sibling, 0 replies; 5+ messages in thread
From: Patchwork @ 2017-05-15 16:53 UTC (permalink / raw)
To: daniele.ceraolospurio; +Cc: intel-gfx
== Series Details ==
Series: drm/i915/guc: capture GuC logs if FW fails to load (rev4)
URL : https://patchwork.freedesktop.org/series/23982/
State : success
== Summary ==
Series 23982v4 drm/i915/guc: capture GuC logs if FW fails to load
https://patchwork.freedesktop.org/api/1.0/series/23982/revisions/4/mbox/
fi-bdw-5557u total:278 pass:267 dwarn:0 dfail:0 fail:0 skip:11 time:451s
fi-bdw-gvtdvm total:278 pass:256 dwarn:8 dfail:0 fail:0 skip:14 time:435s
fi-bsw-n3050 total:278 pass:242 dwarn:0 dfail:0 fail:0 skip:36 time:583s
fi-bxt-j4205 total:278 pass:259 dwarn:0 dfail:0 fail:0 skip:19 time:514s
fi-byt-j1900 total:278 pass:254 dwarn:0 dfail:0 fail:0 skip:24 time:490s
fi-byt-n2820 total:278 pass:250 dwarn:0 dfail:0 fail:0 skip:28 time:486s
fi-hsw-4770 total:278 pass:262 dwarn:0 dfail:0 fail:0 skip:16 time:416s
fi-hsw-4770r total:278 pass:262 dwarn:0 dfail:0 fail:0 skip:16 time:418s
fi-ilk-650 total:278 pass:228 dwarn:0 dfail:0 fail:0 skip:50 time:417s
fi-ivb-3520m total:278 pass:260 dwarn:0 dfail:0 fail:0 skip:18 time:485s
fi-ivb-3770 total:278 pass:260 dwarn:0 dfail:0 fail:0 skip:18 time:465s
fi-kbl-7500u total:278 pass:255 dwarn:5 dfail:0 fail:0 skip:18 time:461s
fi-kbl-7560u total:278 pass:263 dwarn:5 dfail:0 fail:0 skip:10 time:576s
fi-skl-6260u total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time:465s
fi-skl-6700hq total:278 pass:261 dwarn:0 dfail:0 fail:0 skip:17 time:584s
fi-skl-6700k total:278 pass:256 dwarn:4 dfail:0 fail:0 skip:18 time:471s
fi-skl-6770hq total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time:496s
fi-skl-gvtdvm total:278 pass:265 dwarn:0 dfail:0 fail:0 skip:13 time:441s
fi-snb-2520m total:278 pass:250 dwarn:0 dfail:0 fail:0 skip:28 time:544s
fi-snb-2600 total:278 pass:249 dwarn:0 dfail:0 fail:0 skip:29 time:409s
9b25870f9fa4548ec2bb40e42fa28f35db2189e1 drm-tip: 2017y-05m-15d-15h-47m-31s UTC integration manifest
e30ada6 drm/i915/guc: capture GuC logs if FW fails to load
== Logs ==
For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4698/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load
2017-05-15 16:35 [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load Daniele Ceraolo Spurio
2017-05-15 16:53 ` ✓ Fi.CI.BAT: success for drm/i915/guc: capture GuC logs if FW fails to load (rev4) Patchwork
@ 2017-05-15 17:41 ` Michal Wajdeczko
2017-05-15 23:47 ` Daniele Ceraolo Spurio
1 sibling, 1 reply; 5+ messages in thread
From: Michal Wajdeczko @ 2017-05-15 17:41 UTC (permalink / raw)
To: Daniele Ceraolo Spurio; +Cc: intel-gfx
On Mon, May 15, 2017 at 09:35:30AM -0700, Daniele Ceraolo Spurio wrote:
> We're currently deleting the GuC logs if the FW fails to load, but those
> are still useful to understand why the loading failed. Keeping the
> object around allows us to access them after driver load is completed.
>
> v2: keep the object around instead of using kernel memory (chris)
> don't store the logs in the gpu_error struct (Chris)
> add a check on guc_log_level to avoid snapshotting empty logs
>
> v3: use separate debugfs for error log (Chris)
>
> v4: rebased
>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v3)
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 35 ++++++++++++++++++++++-------------
> drivers/gpu/drm/i915/i915_drv.c | 3 +++
> drivers/gpu/drm/i915/intel_guc_log.c | 17 +++++++++++++++++
> drivers/gpu/drm/i915/intel_uc.c | 7 +++++--
> drivers/gpu/drm/i915/intel_uc.h | 5 +++++
> 5 files changed, 52 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index bd9abef..d9a5bd8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2607,27 +2607,35 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
>
> static int i915_guc_log_dump(struct seq_file *m, void *data)
> {
> - struct drm_i915_private *dev_priv = node_to_i915(m->private);
> + struct drm_info_node *node = m->private;
> + struct drm_i915_private *dev_priv = node_to_i915(node);
> + bool dump_err_log = !!node->info_ent->data;
> struct drm_i915_gem_object *obj;
> - int i = 0, pg;
> + u32 *log;
> + int i = 0;
>
> - if (!dev_priv->guc.log.vma)
> + if (!dump_err_log && dev_priv->guc.log.vma)
> + obj = dev_priv->guc.log.vma->obj;
> + else if (dump_err_log && dev_priv->guc.err_load_log)
> + obj = dev_priv->guc.err_load_log;
> + else
> return 0;
Maybe we can move actual printfs into separate generic function that
will just take desired obj? It will simplify this function and possibly
allow more reuse of that new funcion in the future.
We can even try define two separate small functions with names that
corresponds to the debugfs entry, to follow all other functions
>
> - obj = dev_priv->guc.log.vma->obj;
> - for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
> - u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
> -
> - for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
> - seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> - *(log + i), *(log + i + 1),
> - *(log + i + 2), *(log + i + 3));
> -
> - kunmap_atomic(log);
> + log = i915_gem_object_pin_map(obj, I915_MAP_WC);
> + if (IS_ERR(log)) {
> + DRM_ERROR("Failed to pin guc_log object\n");
> + return PTR_ERR(log);
> }
>
> + for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
> + seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> + *(log + i), *(log + i + 1),
> + *(log + i + 2), *(log + i + 3));
> +
> seq_putc(m, '\n');
>
> + i915_gem_object_unpin_map(obj);
> +
> return 0;
> }
>
> @@ -4811,6 +4819,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
> {"i915_guc_info", i915_guc_info, 0},
> {"i915_guc_load_status", i915_guc_load_status_info, 0},
> {"i915_guc_log_dump", i915_guc_log_dump, 0},
> + {"i915_guc_err_load_log_dump", i915_guc_log_dump, 0, (void *)1},
> {"i915_guc_stage_pool", i915_guc_stage_pool, 0},
> {"i915_huc_load_status", i915_huc_load_status_info, 0},
> {"i915_frequency_info", i915_frequency_info, 0},
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 72fb47a..d309165 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1363,6 +1363,9 @@ void i915_driver_unload(struct drm_device *dev)
> cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
> i915_reset_error_state(dev_priv);
>
> + /* release GuC error log (if any) */
> + i915_guc_load_error_log_free(&dev_priv->guc);
Maybe better place for this call would be in intel_uc_fini_hw() that is
already called from i915_gem_fini().
> +
> /* Flush any outstanding unpin_work. */
> drain_workqueue(dev_priv->wq);
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> index 16d3b87..691da42 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
> guc_log_runtime_destroy(&dev_priv->guc);
> mutex_unlock(&dev_priv->drm.struct_mutex);
> }
> +
> +void i915_guc_load_error_log_capture(struct intel_guc *guc)
> +{
> + if (!guc->log.vma || i915.guc_log_level < 0)
> + return;
> +
> + if (!guc->err_load_log)
> + guc->err_load_log = i915_gem_object_get(guc->log.vma->obj);
> +
> + return;
> +}
> +
> +void i915_guc_load_error_log_free(struct intel_guc *guc)
> +{
> + if (guc->err_load_log)
> + i915_gem_object_put(guc->err_load_log);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 07c5658..3669b57 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -309,6 +309,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>
> guc_disable_communication(guc);
> gen9_reset_guc_interrupts(dev_priv);
> + i915_guc_load_error_log_free(guc);
>
> /* We need to notify the guc whenever we change the GGTT */
> i915_ggtt_enable_guc(dev_priv);
> @@ -355,11 +356,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>
> /* Did we succeded or run out of retries? */
> if (ret)
> - goto err_submission;
> + goto err_log_capture;
>
> ret = guc_enable_communication(guc);
> if (ret)
> - goto err_submission;
> + goto err_log_capture;
>
> intel_guc_auth_huc(dev_priv);
> if (i915.enable_guc_submission) {
> @@ -385,6 +386,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
> err_interrupts:
> guc_disable_communication(guc);
> gen9_disable_guc_interrupts(dev_priv);
> +err_log_capture:
> + i915_guc_load_error_log_capture(guc);
> err_submission:
> if (i915.enable_guc_submission)
> i915_guc_submission_fini(dev_priv);
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index 7618b71..d2ed030 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -220,6 +220,9 @@ struct intel_guc {
>
> /* GuC's FW specific notify function */
> void (*notify)(struct intel_guc *guc);
> +
> + /* Log snapshot if GuC errors during load */
> + struct drm_i915_gem_object *err_load_log;
Hmm, as you put new functions in guc_log.c file, then maybe this member
can be placed in the struct guc_log (as "snapshot") ?
or, if we want to keep "log" struct untouched, then at least move this
member up, closer to the existing fw and log members (as it is related)
and also consider to move capture code to intel_uc.c as static functions
> };
>
> struct intel_huc {
> @@ -272,6 +275,8 @@ static inline void intel_guc_notify(struct intel_guc *guc)
> int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
> void i915_guc_log_register(struct drm_i915_private *dev_priv);
> void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
> +void i915_guc_load_error_log_capture(struct intel_guc *guc);
> +void i915_guc_load_error_log_free(struct intel_guc *guc);
bikeshed: we are trying to always use "intel_guc" prefix for functions that
take "guc" as param. Prefix "i915_" is for functions that take "dev_priv".
-Michal
>
> static inline u32 guc_ggtt_offset(struct i915_vma *vma)
> {
> --
> 1.9.1
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load
2017-05-15 17:41 ` [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load Michal Wajdeczko
@ 2017-05-15 23:47 ` Daniele Ceraolo Spurio
2017-05-16 17:18 ` Michal Wajdeczko
0 siblings, 1 reply; 5+ messages in thread
From: Daniele Ceraolo Spurio @ 2017-05-15 23:47 UTC (permalink / raw)
To: Michal Wajdeczko; +Cc: intel-gfx
On 15/05/17 10:41, Michal Wajdeczko wrote:
> On Mon, May 15, 2017 at 09:35:30AM -0700, Daniele Ceraolo Spurio wrote:
>> We're currently deleting the GuC logs if the FW fails to load, but those
>> are still useful to understand why the loading failed. Keeping the
>> object around allows us to access them after driver load is completed.
>>
>> v2: keep the object around instead of using kernel memory (chris)
>> don't store the logs in the gpu_error struct (Chris)
>> add a check on guc_log_level to avoid snapshotting empty logs
>>
>> v3: use separate debugfs for error log (Chris)
>>
>> v4: rebased
>>
>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v3)
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Oscar Mateo <oscar.mateo@intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_debugfs.c | 35 ++++++++++++++++++++++-------------
>> drivers/gpu/drm/i915/i915_drv.c | 3 +++
>> drivers/gpu/drm/i915/intel_guc_log.c | 17 +++++++++++++++++
>> drivers/gpu/drm/i915/intel_uc.c | 7 +++++--
>> drivers/gpu/drm/i915/intel_uc.h | 5 +++++
>> 5 files changed, 52 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index bd9abef..d9a5bd8 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2607,27 +2607,35 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
>>
>> static int i915_guc_log_dump(struct seq_file *m, void *data)
>> {
>> - struct drm_i915_private *dev_priv = node_to_i915(m->private);
>> + struct drm_info_node *node = m->private;
>> + struct drm_i915_private *dev_priv = node_to_i915(node);
>> + bool dump_err_log = !!node->info_ent->data;
>> struct drm_i915_gem_object *obj;
>> - int i = 0, pg;
>> + u32 *log;
>> + int i = 0;
>>
>> - if (!dev_priv->guc.log.vma)
>> + if (!dump_err_log && dev_priv->guc.log.vma)
>> + obj = dev_priv->guc.log.vma->obj;
>> + else if (dump_err_log && dev_priv->guc.err_load_log)
>> + obj = dev_priv->guc.err_load_log;
>> + else
>> return 0;
>
> Maybe we can move actual printfs into separate generic function that
> will just take desired obj? It will simplify this function and possibly
> allow more reuse of that new funcion in the future.
>
The problem here would be that each debugfs function has its own desired
formatting and might only be interested in a section of an object (e.g.
i915_dump_lrc_obj), not sure how much we can reuse. I'd prefer to wait
for an actual use case before splitting it out.
> We can even try define two separate small functions with names that
> corresponds to the debugfs entry, to follow all other functions
>
We already have a case where 2 debugfs entries are pointing to the same
function (i915_gem_gtt and i915_gem_pin_display) and it feels cleaner to
me compared to having 2 one-liner function just to pick between the
different objects. If you're ok with it I'd prefer to instead clean up
the if statement the following way to make the obj selection more evident:
if (dump_err_log)
obj = dev_priv->guc.log.err_load;
else
obj = dev_priv->guc.log.vma->obj;
if (!obj)
return 0;
>>
>> - obj = dev_priv->guc.log.vma->obj;
>> - for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
>> - u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
>> -
>> - for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
>> - seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
>> - *(log + i), *(log + i + 1),
>> - *(log + i + 2), *(log + i + 3));
>> -
>> - kunmap_atomic(log);
>> + log = i915_gem_object_pin_map(obj, I915_MAP_WC);
>> + if (IS_ERR(log)) {
>> + DRM_ERROR("Failed to pin guc_log object\n");
>> + return PTR_ERR(log);
>> }
>>
>> + for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
>> + seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
>> + *(log + i), *(log + i + 1),
>> + *(log + i + 2), *(log + i + 3));
>> +
>> seq_putc(m, '\n');
>>
>> + i915_gem_object_unpin_map(obj);
>> +
>> return 0;
>> }
>>
>> @@ -4811,6 +4819,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
>> {"i915_guc_info", i915_guc_info, 0},
>> {"i915_guc_load_status", i915_guc_load_status_info, 0},
>> {"i915_guc_log_dump", i915_guc_log_dump, 0},
>> + {"i915_guc_err_load_log_dump", i915_guc_log_dump, 0, (void *)1},
>> {"i915_guc_stage_pool", i915_guc_stage_pool, 0},
>> {"i915_huc_load_status", i915_huc_load_status_info, 0},
>> {"i915_frequency_info", i915_frequency_info, 0},
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>> index 72fb47a..d309165 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -1363,6 +1363,9 @@ void i915_driver_unload(struct drm_device *dev)
>> cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
>> i915_reset_error_state(dev_priv);
>>
>> + /* release GuC error log (if any) */
>> + i915_guc_load_error_log_free(&dev_priv->guc);
>
> Maybe better place for this call would be in intel_uc_fini_hw() that is
> already called from i915_gem_fini().
>
Agreed, this positioning is a remnant of v1. Will move.
>> +
>> /* Flush any outstanding unpin_work. */
>> drain_workqueue(dev_priv->wq);
>>
>> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
>> index 16d3b87..691da42 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_log.c
>> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
>> @@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>> guc_log_runtime_destroy(&dev_priv->guc);
>> mutex_unlock(&dev_priv->drm.struct_mutex);
>> }
>> +
>> +void i915_guc_load_error_log_capture(struct intel_guc *guc)
>> +{
>> + if (!guc->log.vma || i915.guc_log_level < 0)
>> + return;
>> +
>> + if (!guc->err_load_log)
>> + guc->err_load_log = i915_gem_object_get(guc->log.vma->obj);
>> +
>> + return;
>> +}
>> +
>> +void i915_guc_load_error_log_free(struct intel_guc *guc)
>> +{
>> + if (guc->err_load_log)
>> + i915_gem_object_put(guc->err_load_log);
>> +}
>> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
>> index 07c5658..3669b57 100644
>> --- a/drivers/gpu/drm/i915/intel_uc.c
>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>> @@ -309,6 +309,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>>
>> guc_disable_communication(guc);
>> gen9_reset_guc_interrupts(dev_priv);
>> + i915_guc_load_error_log_free(guc);
>>
>> /* We need to notify the guc whenever we change the GGTT */
>> i915_ggtt_enable_guc(dev_priv);
>> @@ -355,11 +356,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>>
>> /* Did we succeded or run out of retries? */
>> if (ret)
>> - goto err_submission;
>> + goto err_log_capture;
>>
>> ret = guc_enable_communication(guc);
>> if (ret)
>> - goto err_submission;
>> + goto err_log_capture;
>>
>> intel_guc_auth_huc(dev_priv);
>> if (i915.enable_guc_submission) {
>> @@ -385,6 +386,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>> err_interrupts:
>> guc_disable_communication(guc);
>> gen9_disable_guc_interrupts(dev_priv);
>> +err_log_capture:
>> + i915_guc_load_error_log_capture(guc);
>> err_submission:
>> if (i915.enable_guc_submission)
>> i915_guc_submission_fini(dev_priv);
>> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
>> index 7618b71..d2ed030 100644
>> --- a/drivers/gpu/drm/i915/intel_uc.h
>> +++ b/drivers/gpu/drm/i915/intel_uc.h
>> @@ -220,6 +220,9 @@ struct intel_guc {
>>
>> /* GuC's FW specific notify function */
>> void (*notify)(struct intel_guc *guc);
>> +
>> + /* Log snapshot if GuC errors during load */
>> + struct drm_i915_gem_object *err_load_log;
>
> Hmm, as you put new functions in guc_log.c file, then maybe this member
> can be placed in the struct guc_log (as "snapshot") ?
>
> or, if we want to keep "log" struct untouched, then at least move this
> member up, closer to the existing fw and log members (as it is related)
> and also consider to move capture code to intel_uc.c as static functions
>
I agree it makes more sense to move it inside the log struct to keep
everything log-related in one place, I'll move it to "guc.log.err_load".
For the same reason I'd prefer to keep the functions in intel_guc_log.c.
>> };
>>
>> struct intel_huc {
>> @@ -272,6 +275,8 @@ static inline void intel_guc_notify(struct intel_guc *guc)
>> int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
>> void i915_guc_log_register(struct drm_i915_private *dev_priv);
>> void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
>> +void i915_guc_load_error_log_capture(struct intel_guc *guc);
>> +void i915_guc_load_error_log_free(struct intel_guc *guc);
>
> bikeshed: we are trying to always use "intel_guc" prefix for functions that
> take "guc" as param. Prefix "i915_" is for functions that take "dev_priv".
>
Noted, will update.
Thanks,
Daniele
> -Michal
>
>>
>> static inline u32 guc_ggtt_offset(struct i915_vma *vma)
>> {
>> --
>> 1.9.1
>>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load
2017-05-15 23:47 ` Daniele Ceraolo Spurio
@ 2017-05-16 17:18 ` Michal Wajdeczko
0 siblings, 0 replies; 5+ messages in thread
From: Michal Wajdeczko @ 2017-05-16 17:18 UTC (permalink / raw)
To: Daniele Ceraolo Spurio; +Cc: intel-gfx
On Mon, May 15, 2017 at 04:47:16PM -0700, Daniele Ceraolo Spurio wrote:
>
>
> On 15/05/17 10:41, Michal Wajdeczko wrote:
> > On Mon, May 15, 2017 at 09:35:30AM -0700, Daniele Ceraolo Spurio wrote:
> > > We're currently deleting the GuC logs if the FW fails to load, but those
> > > are still useful to understand why the loading failed. Keeping the
> > > object around allows us to access them after driver load is completed.
> > >
> > > v2: keep the object around instead of using kernel memory (chris)
> > > don't store the logs in the gpu_error struct (Chris)
> > > add a check on guc_log_level to avoid snapshotting empty logs
> > >
> > > v3: use separate debugfs for error log (Chris)
> > >
> > > v4: rebased
> > >
> > > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v3)
> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Oscar Mateo <oscar.mateo@intel.com>
> > > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > > ---
> > > drivers/gpu/drm/i915/i915_debugfs.c | 35 ++++++++++++++++++++++-------------
> > > drivers/gpu/drm/i915/i915_drv.c | 3 +++
> > > drivers/gpu/drm/i915/intel_guc_log.c | 17 +++++++++++++++++
> > > drivers/gpu/drm/i915/intel_uc.c | 7 +++++--
> > > drivers/gpu/drm/i915/intel_uc.h | 5 +++++
> > > 5 files changed, 52 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > > index bd9abef..d9a5bd8 100644
> > > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > > @@ -2607,27 +2607,35 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
> > >
> > > static int i915_guc_log_dump(struct seq_file *m, void *data)
> > > {
> > > - struct drm_i915_private *dev_priv = node_to_i915(m->private);
> > > + struct drm_info_node *node = m->private;
> > > + struct drm_i915_private *dev_priv = node_to_i915(node);
> > > + bool dump_err_log = !!node->info_ent->data;
> > > struct drm_i915_gem_object *obj;
> > > - int i = 0, pg;
> > > + u32 *log;
> > > + int i = 0;
> > >
> > > - if (!dev_priv->guc.log.vma)
> > > + if (!dump_err_log && dev_priv->guc.log.vma)
> > > + obj = dev_priv->guc.log.vma->obj;
> > > + else if (dump_err_log && dev_priv->guc.err_load_log)
> > > + obj = dev_priv->guc.err_load_log;
> > > + else
> > > return 0;
> >
> > Maybe we can move actual printfs into separate generic function that
> > will just take desired obj? It will simplify this function and possibly
> > allow more reuse of that new funcion in the future.
> >
>
> The problem here would be that each debugfs function has its own desired
> formatting and might only be interested in a section of an object (e.g.
> i915_dump_lrc_obj), not sure how much we can reuse. I'd prefer to wait for
> an actual use case before splitting it out.
>
> > We can even try define two separate small functions with names that
> > corresponds to the debugfs entry, to follow all other functions
> >
>
> We already have a case where 2 debugfs entries are pointing to the same
> function (i915_gem_gtt and i915_gem_pin_display) and it feels cleaner to me
> compared to having 2 one-liner function just to pick between the different
> objects. If you're ok with it I'd prefer to instead clean up the if
> statement the following way to make the obj selection more evident:
>
> if (dump_err_log)
> obj = dev_priv->guc.log.err_load;
> else
> obj = dev_priv->guc.log.vma->obj;
>
> if (!obj)
> return 0;
>
Ok, we start with this.
> > >
> > > - obj = dev_priv->guc.log.vma->obj;
> > > - for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
> > > - u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
> > > -
> > > - for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
> > > - seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> > > - *(log + i), *(log + i + 1),
> > > - *(log + i + 2), *(log + i + 3));
> > > -
> > > - kunmap_atomic(log);
> > > + log = i915_gem_object_pin_map(obj, I915_MAP_WC);
> > > + if (IS_ERR(log)) {
> > > + DRM_ERROR("Failed to pin guc_log object\n");
> > > + return PTR_ERR(log);
> > > }
> > >
> > > + for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
> > > + seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
> > > + *(log + i), *(log + i + 1),
> > > + *(log + i + 2), *(log + i + 3));
> > > +
> > > seq_putc(m, '\n');
> > >
> > > + i915_gem_object_unpin_map(obj);
> > > +
> > > return 0;
> > > }
> > >
> > > @@ -4811,6 +4819,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file)
> > > {"i915_guc_info", i915_guc_info, 0},
> > > {"i915_guc_load_status", i915_guc_load_status_info, 0},
> > > {"i915_guc_log_dump", i915_guc_log_dump, 0},
> > > + {"i915_guc_err_load_log_dump", i915_guc_log_dump, 0, (void *)1},
> > > {"i915_guc_stage_pool", i915_guc_stage_pool, 0},
> > > {"i915_huc_load_status", i915_huc_load_status_info, 0},
> > > {"i915_frequency_info", i915_frequency_info, 0},
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > > index 72fb47a..d309165 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.c
> > > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > > @@ -1363,6 +1363,9 @@ void i915_driver_unload(struct drm_device *dev)
> > > cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
> > > i915_reset_error_state(dev_priv);
> > >
> > > + /* release GuC error log (if any) */
> > > + i915_guc_load_error_log_free(&dev_priv->guc);
> >
> > Maybe better place for this call would be in intel_uc_fini_hw() that is
> > already called from i915_gem_fini().
> >
>
> Agreed, this positioning is a remnant of v1. Will move.
>
> > > +
> > > /* Flush any outstanding unpin_work. */
> > > drain_workqueue(dev_priv->wq);
> > >
> > > diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> > > index 16d3b87..691da42 100644
> > > --- a/drivers/gpu/drm/i915/intel_guc_log.c
> > > +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> > > @@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
> > > guc_log_runtime_destroy(&dev_priv->guc);
> > > mutex_unlock(&dev_priv->drm.struct_mutex);
> > > }
> > > +
> > > +void i915_guc_load_error_log_capture(struct intel_guc *guc)
> > > +{
> > > + if (!guc->log.vma || i915.guc_log_level < 0)
> > > + return;
> > > +
> > > + if (!guc->err_load_log)
> > > + guc->err_load_log = i915_gem_object_get(guc->log.vma->obj);
> > > +
> > > + return;
> > > +}
> > > +
> > > +void i915_guc_load_error_log_free(struct intel_guc *guc)
> > > +{
> > > + if (guc->err_load_log)
> > > + i915_gem_object_put(guc->err_load_log);
> > > +}
> > > diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> > > index 07c5658..3669b57 100644
> > > --- a/drivers/gpu/drm/i915/intel_uc.c
> > > +++ b/drivers/gpu/drm/i915/intel_uc.c
> > > @@ -309,6 +309,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
> > >
> > > guc_disable_communication(guc);
> > > gen9_reset_guc_interrupts(dev_priv);
> > > + i915_guc_load_error_log_free(guc);
> > >
> > > /* We need to notify the guc whenever we change the GGTT */
> > > i915_ggtt_enable_guc(dev_priv);
> > > @@ -355,11 +356,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
> > >
> > > /* Did we succeded or run out of retries? */
> > > if (ret)
> > > - goto err_submission;
> > > + goto err_log_capture;
> > >
> > > ret = guc_enable_communication(guc);
> > > if (ret)
> > > - goto err_submission;
> > > + goto err_log_capture;
> > >
> > > intel_guc_auth_huc(dev_priv);
> > > if (i915.enable_guc_submission) {
> > > @@ -385,6 +386,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
> > > err_interrupts:
> > > guc_disable_communication(guc);
> > > gen9_disable_guc_interrupts(dev_priv);
> > > +err_log_capture:
> > > + i915_guc_load_error_log_capture(guc);
> > > err_submission:
> > > if (i915.enable_guc_submission)
> > > i915_guc_submission_fini(dev_priv);
> > > diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> > > index 7618b71..d2ed030 100644
> > > --- a/drivers/gpu/drm/i915/intel_uc.h
> > > +++ b/drivers/gpu/drm/i915/intel_uc.h
> > > @@ -220,6 +220,9 @@ struct intel_guc {
> > >
> > > /* GuC's FW specific notify function */
> > > void (*notify)(struct intel_guc *guc);
> > > +
> > > + /* Log snapshot if GuC errors during load */
> > > + struct drm_i915_gem_object *err_load_log;
> >
> > Hmm, as you put new functions in guc_log.c file, then maybe this member
> > can be placed in the struct guc_log (as "snapshot") ?
> >
> > or, if we want to keep "log" struct untouched, then at least move this
> > member up, closer to the existing fw and log members (as it is related)
> > and also consider to move capture code to intel_uc.c as static functions
> >
>
> I agree it makes more sense to move it inside the log struct to keep
> everything log-related in one place, I'll move it to "guc.log.err_load". For
> the same reason I'd prefer to keep the functions in intel_guc_log.c.
>
But then, maybe they shall be named like this:
void intel_guc_log_capture_err(struct intel_guc *guc);
void intel_guc_log_free_err(struct intel_guc *guc);
Thanks,
Michal
> > > };
> > >
> > > struct intel_huc {
> > > @@ -272,6 +275,8 @@ static inline void intel_guc_notify(struct intel_guc *guc)
> > > int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
> > > void i915_guc_log_register(struct drm_i915_private *dev_priv);
> > > void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
> > > +void i915_guc_load_error_log_capture(struct intel_guc *guc);
> > > +void i915_guc_load_error_log_free(struct intel_guc *guc);
> >
> > bikeshed: we are trying to always use "intel_guc" prefix for functions that
> > take "guc" as param. Prefix "i915_" is for functions that take "dev_priv".
> >
>
> Noted, will update.
>
> Thanks,
> Daniele
>
> > -Michal
> >
> > >
> > > static inline u32 guc_ggtt_offset(struct i915_vma *vma)
> > > {
> > > --
> > > 1.9.1
> > >
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2017-05-16 17:18 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-15 16:35 [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load Daniele Ceraolo Spurio
2017-05-15 16:53 ` ✓ Fi.CI.BAT: success for drm/i915/guc: capture GuC logs if FW fails to load (rev4) Patchwork
2017-05-15 17:41 ` [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load Michal Wajdeczko
2017-05-15 23:47 ` Daniele Ceraolo Spurio
2017-05-16 17:18 ` Michal Wajdeczko
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.