* [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
@ 2017-10-19 20:23 Michal Wajdeczko
2017-10-19 20:47 ` ✗ Fi.CI.BAT: failure for drm/i915: Add Guc/HuC firmware details to error state (rev3) Patchwork
2017-10-20 9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
0 siblings, 2 replies; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-19 20:23 UTC (permalink / raw)
To: intel-gfx
Include GuC and HuC firmware details in captured error state
to provide additional debug information. To reuse existing
uc firmware pretty printer, introduce new drm-printer variant
that works with our i915_error_state_buf output. Also update
uc firmware pretty printer to accept const input.
v2: don't rely on current caps (Chris)
dump correct fw info (Michal)
v3: simplify capture of custom paths (Chris)
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 4 ++++
drivers/gpu/drm/i915/i915_gpu_error.c | 39 +++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_uc_fw.c | 2 +-
drivers/gpu/drm/i915/intel_uc_fw.h | 2 +-
4 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3c2649c..4d6519b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -911,6 +911,10 @@ struct i915_gpu_state {
struct intel_device_info device_info;
struct i915_params params;
+ /* uC state */
+ struct intel_uc_fw guc_fw;
+ struct intel_uc_fw huc_fw;
+
/* Generic register state */
u32 eir;
u32 pgtbl_er;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 653fb69..21d78eb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@
#include <generated/utsrelease.h>
#include <linux/stop_machine.h>
#include <linux/zlib.h>
+#include <drm/drm_print.h>
+
#include "i915_drv.h"
static const char *engine_str(int engine)
@@ -175,6 +177,21 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
#define err_puts(e, s) i915_error_puts(e, s)
+static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
+{
+ i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
+}
+
+static inline struct drm_printer
+i915_error_printer(struct drm_i915_error_state_buf *e)
+{
+ struct drm_printer p = {
+ .printfn = __i915_printfn_error,
+ .arg = e,
+ };
+ return p;
+}
+
#ifdef CONFIG_DRM_I915_COMPRESS_ERROR
struct compress {
@@ -592,8 +609,10 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
const struct i915_gpu_state *error)
{
+ struct drm_printer p = i915_error_printer(m);
struct drm_i915_private *dev_priv = m->i915;
struct drm_i915_error_object *obj;
+
int i, j;
if (!error) {
@@ -774,6 +793,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_print_capabilities(m, &error->device_info);
err_print_params(m, &error->params);
+ if (error->device_info.has_guc) {
+ intel_uc_fw_dump(&error->guc_fw, &p);
+ intel_uc_fw_dump(&error->huc_fw, &p);
+ }
+
if (m->bytes == 0 && m->err)
return m->err;
@@ -870,6 +894,9 @@ void __i915_gpu_state_free(struct kref *error_ref)
I915_PARAMS_FOR_EACH(FREE);
#undef FREE
+ kfree(error->guc_fw.path);
+ kfree(error->huc_fw.path);
+
kfree(error);
}
@@ -1559,6 +1586,17 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
error->pinned_bo = bo;
}
+static void i915_capture_uc_state(struct drm_i915_private *dev_priv,
+ struct i915_gpu_state *error)
+{
+ error->guc_fw = dev_priv->guc.fw;
+ error->huc_fw = dev_priv->huc.fw;
+
+ /* Make sure to capture custom firmware paths */
+ error->guc_fw.path = kstrdup(error->guc_fw.path, GFP_ATOMIC);
+ error->huc_fw.path = kstrdup(error->huc_fw.path, GFP_ATOMIC);
+}
+
static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
struct i915_gpu_state *error)
{
@@ -1710,6 +1748,7 @@ static int capture(void *data)
I915_PARAMS_FOR_EACH(DUP);
#undef DUP
+ i915_capture_uc_state(error->i915, error);
i915_capture_gen_state(error->i915, error);
i915_capture_reg_state(error->i915, error);
i915_gem_record_fences(error->i915, error);
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index 973888e..4bc82d3 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -299,7 +299,7 @@ void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
*
* Pretty printer for uC firmware.
*/
-void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p)
+void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
{
drm_printf(p, "%s firmware: %s\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index 1329036..5394d9d 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -116,6 +116,6 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
int (*xfer)(struct intel_uc_fw *uc_fw,
struct i915_vma *vma));
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
-void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p);
+void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
#endif
--
2.7.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* ✗ Fi.CI.BAT: failure for drm/i915: Add Guc/HuC firmware details to error state (rev3)
2017-10-19 20:23 [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Michal Wajdeczko
@ 2017-10-19 20:47 ` Patchwork
2017-10-20 9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
1 sibling, 0 replies; 14+ messages in thread
From: Patchwork @ 2017-10-19 20:47 UTC (permalink / raw)
To: Michal Wajdeczko; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Add Guc/HuC firmware details to error state (rev3)
URL : https://patchwork.freedesktop.org/series/32293/
State : failure
== Summary ==
Series 32293v3 drm/i915: Add Guc/HuC firmware details to error state
https://patchwork.freedesktop.org/api/1.0/series/32293/revisions/3/mbox/
Test chamelium:
Subgroup dp-hpd-fast:
skip -> INCOMPLETE (fi-bdw-gvtdvm) fdo#102332
Test kms_flip:
Subgroup basic-flip-vs-wf_vblank:
incomplete -> PASS (fi-skl-6700hq)
pass -> INCOMPLETE (fi-skl-6770hq)
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
pass -> DMESG-WARN (fi-byt-n2820) fdo#101705
Test drv_module_reload:
Subgroup basic-reload-inject:
dmesg-warn -> INCOMPLETE (fi-cfl-s) fdo#103206
fdo#102332 https://bugs.freedesktop.org/show_bug.cgi?id=102332
fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705
fdo#103206 https://bugs.freedesktop.org/show_bug.cgi?id=103206
fi-bdw-5557u total:289 pass:268 dwarn:0 dfail:0 fail:0 skip:21 time:439s
fi-bdw-gvtdvm total:59 pass:0 dwarn:0 dfail:0 fail:0 skip:0
fi-blb-e6850 total:289 pass:223 dwarn:1 dfail:0 fail:0 skip:65 time:374s
fi-bsw-n3050 total:289 pass:243 dwarn:0 dfail:0 fail:0 skip:46 time:525s
fi-bwr-2160 total:289 pass:183 dwarn:0 dfail:0 fail:0 skip:106 time:263s
fi-bxt-dsi total:289 pass:259 dwarn:0 dfail:0 fail:0 skip:30 time:500s
fi-bxt-j4205 total:289 pass:260 dwarn:0 dfail:0 fail:0 skip:29 time:497s
fi-byt-j1900 total:289 pass:253 dwarn:1 dfail:0 fail:0 skip:35 time:494s
fi-byt-n2820 total:289 pass:249 dwarn:1 dfail:0 fail:0 skip:39 time:482s
fi-cfl-s total:288 pass:253 dwarn:3 dfail:0 fail:0 skip:31
fi-elk-e7500 total:289 pass:229 dwarn:0 dfail:0 fail:0 skip:60 time:415s
fi-gdg-551 total:289 pass:178 dwarn:1 dfail:0 fail:1 skip:109 time:250s
fi-glk-1 total:289 pass:261 dwarn:0 dfail:0 fail:0 skip:28 time:581s
fi-hsw-4770 total:289 pass:262 dwarn:0 dfail:0 fail:0 skip:27 time:449s
fi-hsw-4770r total:289 pass:262 dwarn:0 dfail:0 fail:0 skip:27 time:426s
fi-ilk-650 total:289 pass:228 dwarn:0 dfail:0 fail:0 skip:61 time:434s
fi-ivb-3520m total:289 pass:260 dwarn:0 dfail:0 fail:0 skip:29 time:493s
fi-ivb-3770 total:289 pass:260 dwarn:0 dfail:0 fail:0 skip:29 time:462s
fi-kbl-7500u total:289 pass:264 dwarn:1 dfail:0 fail:0 skip:24 time:487s
fi-kbl-7560u total:289 pass:270 dwarn:0 dfail:0 fail:0 skip:19 time:570s
fi-kbl-7567u total:289 pass:269 dwarn:0 dfail:0 fail:0 skip:20 time:473s
fi-kbl-r total:289 pass:262 dwarn:0 dfail:0 fail:0 skip:27 time:582s
fi-pnv-d510 total:289 pass:222 dwarn:1 dfail:0 fail:0 skip:66 time:547s
fi-skl-6260u total:289 pass:269 dwarn:0 dfail:0 fail:0 skip:20 time:452s
fi-skl-6700hq total:289 pass:263 dwarn:0 dfail:0 fail:0 skip:26 time:648s
fi-skl-6700k total:289 pass:265 dwarn:0 dfail:0 fail:0 skip:24 time:521s
fi-skl-6770hq total:219 pass:206 dwarn:0 dfail:0 fail:0 skip:12
fi-skl-gvtdvm total:289 pass:266 dwarn:0 dfail:0 fail:0 skip:23 time:459s
fi-snb-2520m total:289 pass:250 dwarn:0 dfail:0 fail:0 skip:39 time:567s
fi-snb-2600 total:289 pass:249 dwarn:0 dfail:0 fail:0 skip:40 time:422s
87b113da0ae43b97650ddcec6c05ef69469e2107 drm-tip: 2017y-10m-19d-18h-14m-41s UTC integration manifest
e08b27eb1f11 drm/i915: Add Guc/HuC firmware details to error state
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_6114/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-19 20:23 [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Michal Wajdeczko
2017-10-19 20:47 ` ✗ Fi.CI.BAT: failure for drm/i915: Add Guc/HuC firmware details to error state (rev3) Patchwork
@ 2017-10-20 9:41 ` Joonas Lahtinen
2017-10-20 11:44 ` Chris Wilson
2017-10-20 14:44 ` Michal Wajdeczko
1 sibling, 2 replies; 14+ messages in thread
From: Joonas Lahtinen @ 2017-10-20 9:41 UTC (permalink / raw)
To: Michal Wajdeczko, intel-gfx
On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> Include GuC and HuC firmware details in captured error state
> to provide additional debug information. To reuse existing
> uc firmware pretty printer, introduce new drm-printer variant
> that works with our i915_error_state_buf output. Also update
> uc firmware pretty printer to accept const input.
>
> v2: don't rely on current caps (Chris)
> dump correct fw info (Michal)
> v3: simplify capture of custom paths (Chris)
>
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
A few comments below.
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Regards, Joonas
> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> err_print_capabilities(m, &error->device_info);
> err_print_params(m, &error->params);
>
> + if (error->device_info.has_guc) {
> + intel_uc_fw_dump(&error->guc_fw, &p);
> + intel_uc_fw_dump(&error->huc_fw, &p);
> + }
I might use "error->{g,h}uc_fw.path" as the condition, for both
individually. We will have DMC here in the future, too.
> +static void i915_capture_uc_state(struct drm_i915_private *dev_priv,
> + struct i915_gpu_state *error)
> +{
> + error->guc_fw = dev_priv->guc.fw;
> + error->huc_fw = dev_priv->huc.fw;
> +
> + /* Make sure to capture custom firmware paths */
This is again a "what" comment, not "why". And they're not necessarily
the custom ones. The "why" is generic to all error capture funcs.
> + error->guc_fw.path = kstrdup(error->guc_fw.path, GFP_ATOMIC);
> + error->huc_fw.path = kstrdup(error->huc_fw.path, GFP_ATOMIC);
> +}
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
@ 2017-10-20 11:44 ` Chris Wilson
2017-10-20 12:14 ` Michal Wajdeczko
2017-10-20 14:44 ` Michal Wajdeczko
1 sibling, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 11:44 UTC (permalink / raw)
To: Joonas Lahtinen, Michal Wajdeczko, intel-gfx
Quoting Joonas Lahtinen (2017-10-20 10:41:57)
> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> > err_print_capabilities(m, &error->device_info);
> > err_print_params(m, &error->params);
> >
> > + if (error->device_info.has_guc) {
> > + intel_uc_fw_dump(&error->guc_fw, &p);
> > + intel_uc_fw_dump(&error->huc_fw, &p);
> > + }
>
> I might use "error->{g,h}uc_fw.path" as the condition, for both
> individually. We will have DMC here in the future, too.
That's the type of predicate I was looking for. If we can tell when the
fw has been loaded just by looking at the uc_fw struct, please do so.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 11:44 ` Chris Wilson
@ 2017-10-20 12:14 ` Michal Wajdeczko
2017-10-20 12:21 ` Chris Wilson
0 siblings, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 12:14 UTC (permalink / raw)
To: Joonas Lahtinen, intel-gfx, Chris Wilson
On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson
<chris@chris-wilson.co.uk> wrote:
> Quoting Joonas Lahtinen (2017-10-20 10:41:57)
>> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
>> drm_i915_error_state_buf *m,
>> > err_print_capabilities(m, &error->device_info);
>> > err_print_params(m, &error->params);
>> >
>> > + if (error->device_info.has_guc) {
>> > + intel_uc_fw_dump(&error->guc_fw, &p);
>> > + intel_uc_fw_dump(&error->huc_fw, &p);
>> > + }
>>
>> I might use "error->{g,h}uc_fw.path" as the condition, for both
>> individually. We will have DMC here in the future, too.
>
> That's the type of predicate I was looking for. If we can tell when the
> fw has been loaded just by looking at the uc_fw struct, please do so.
If you want to include uc fw info only when fw was loaded at capture
moment,
then we can look directly at its "load_status" field:
if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
But what if fw was loaded earlier, before error capture is started?
Don't we want to capture whole driver configuration?
Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 12:14 ` Michal Wajdeczko
@ 2017-10-20 12:21 ` Chris Wilson
2017-10-20 12:36 ` Michal Wajdeczko
0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 12:21 UTC (permalink / raw)
To: Michal Wajdeczko, Joonas Lahtinen, intel-gfx
Quoting Michal Wajdeczko (2017-10-20 13:14:45)
> On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson
> <chris@chris-wilson.co.uk> wrote:
>
> > Quoting Joonas Lahtinen (2017-10-20 10:41:57)
> >> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> >> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
> >> drm_i915_error_state_buf *m,
> >> > err_print_capabilities(m, &error->device_info);
> >> > err_print_params(m, &error->params);
> >> >
> >> > + if (error->device_info.has_guc) {
> >> > + intel_uc_fw_dump(&error->guc_fw, &p);
> >> > + intel_uc_fw_dump(&error->huc_fw, &p);
> >> > + }
> >>
> >> I might use "error->{g,h}uc_fw.path" as the condition, for both
> >> individually. We will have DMC here in the future, too.
> >
> > That's the type of predicate I was looking for. If we can tell when the
> > fw has been loaded just by looking at the uc_fw struct, please do so.
>
> If you want to include uc fw info only when fw was loaded at capture
> moment,
> then we can look directly at its "load_status" field:
>
> if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
>
> But what if fw was loaded earlier, before error capture is started?
> Don't we want to capture whole driver configuration?
The goal is to know what the driver/hw was doing at the time of the
capture (which we presume is still identical to the hang).
At this point, we are just asking ourselves what is the most unambiguous
way of printing valid data. We already show has_guc in the output, so it
boils down to what was the uc_fw state. It doesn't need to be
load_status == SUCCESS, as we should show a failed attempt to upload the
fw as well.
So intel_uc_fw.size?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 12:21 ` Chris Wilson
@ 2017-10-20 12:36 ` Michal Wajdeczko
2017-10-20 12:43 ` Chris Wilson
0 siblings, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 12:36 UTC (permalink / raw)
To: Joonas Lahtinen, intel-gfx, Chris Wilson
On Fri, 20 Oct 2017 14:21:16 +0200, Chris Wilson
<chris@chris-wilson.co.uk> wrote:
> Quoting Michal Wajdeczko (2017-10-20 13:14:45)
>> On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson
>> <chris@chris-wilson.co.uk> wrote:
>>
>> > Quoting Joonas Lahtinen (2017-10-20 10:41:57)
>> >> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> >> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
>> >> drm_i915_error_state_buf *m,
>> >> > err_print_capabilities(m, &error->device_info);
>> >> > err_print_params(m, &error->params);
>> >> >
>> >> > + if (error->device_info.has_guc) {
>> >> > + intel_uc_fw_dump(&error->guc_fw, &p);
>> >> > + intel_uc_fw_dump(&error->huc_fw, &p);
>> >> > + }
>> >>
>> >> I might use "error->{g,h}uc_fw.path" as the condition, for both
>> >> individually. We will have DMC here in the future, too.
>> >
>> > That's the type of predicate I was looking for. If we can tell when
>> the
>> > fw has been loaded just by looking at the uc_fw struct, please do so.
>>
>> If you want to include uc fw info only when fw was loaded at capture
>> moment,
>> then we can look directly at its "load_status" field:
>>
>> if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
>>
>> But what if fw was loaded earlier, before error capture is started?
>> Don't we want to capture whole driver configuration?
>
> The goal is to know what the driver/hw was doing at the time of the
> capture (which we presume is still identical to the hang).
>
> At this point, we are just asking ourselves what is the most unambiguous
> way of printing valid data. We already show has_guc in the output, so it
> boils down to what was the uc_fw state. It doesn't need to be
> load_status == SUCCESS, as we should show a failed attempt to upload the
> fw as well.
>
> So intel_uc_fw.size?
But then in case of zero size fw you will not get info about load failure
;)
What about leaving here "if (error->device_info.has_guc)" as main guard
for both GuC and HuC fw dumps (note that upcoming series make it explicit
that HuC is always in pair with GuC) but then also add "if (uc_fw->path)"
condition to the uc fw pretty printer to stop printing fw details if no
path was specified (all fields will be void anyway). Then we may get:
has_guc: yes
...
GuC firmware: (null)
HuC firmware: (null)
Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 12:36 ` Michal Wajdeczko
@ 2017-10-20 12:43 ` Chris Wilson
2017-10-20 12:50 ` Michal Wajdeczko
0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 12:43 UTC (permalink / raw)
To: Michal Wajdeczko, Joonas Lahtinen, intel-gfx
Quoting Michal Wajdeczko (2017-10-20 13:36:59)
> On Fri, 20 Oct 2017 14:21:16 +0200, Chris Wilson
> <chris@chris-wilson.co.uk> wrote:
>
> > Quoting Michal Wajdeczko (2017-10-20 13:14:45)
> >> On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson
> >> <chris@chris-wilson.co.uk> wrote:
> >>
> >> > Quoting Joonas Lahtinen (2017-10-20 10:41:57)
> >> >> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> >> >> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
> >> >> drm_i915_error_state_buf *m,
> >> >> > err_print_capabilities(m, &error->device_info);
> >> >> > err_print_params(m, &error->params);
> >> >> >
> >> >> > + if (error->device_info.has_guc) {
> >> >> > + intel_uc_fw_dump(&error->guc_fw, &p);
> >> >> > + intel_uc_fw_dump(&error->huc_fw, &p);
> >> >> > + }
> >> >>
> >> >> I might use "error->{g,h}uc_fw.path" as the condition, for both
> >> >> individually. We will have DMC here in the future, too.
> >> >
> >> > That's the type of predicate I was looking for. If we can tell when
> >> the
> >> > fw has been loaded just by looking at the uc_fw struct, please do so.
> >>
> >> If you want to include uc fw info only when fw was loaded at capture
> >> moment,
> >> then we can look directly at its "load_status" field:
> >>
> >> if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> >>
> >> But what if fw was loaded earlier, before error capture is started?
> >> Don't we want to capture whole driver configuration?
> >
> > The goal is to know what the driver/hw was doing at the time of the
> > capture (which we presume is still identical to the hang).
> >
> > At this point, we are just asking ourselves what is the most unambiguous
> > way of printing valid data. We already show has_guc in the output, so it
> > boils down to what was the uc_fw state. It doesn't need to be
> > load_status == SUCCESS, as we should show a failed attempt to upload the
> > fw as well.
> >
> > So intel_uc_fw.size?
>
> But then in case of zero size fw you will not get info about load failure
> ;)
Heh, I would just go back and make sure we had a clear well of deducing
when fw was being utilised. Maybe
if (uc_fw.load_status != INTEL_UC_FIRMWARE_NONE)
?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 12:43 ` Chris Wilson
@ 2017-10-20 12:50 ` Michal Wajdeczko
2017-10-20 12:57 ` Chris Wilson
0 siblings, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 12:50 UTC (permalink / raw)
To: Joonas Lahtinen, intel-gfx, Chris Wilson
On Fri, 20 Oct 2017 14:43:46 +0200, Chris Wilson
<chris@chris-wilson.co.uk> wrote:
> Quoting Michal Wajdeczko (2017-10-20 13:36:59)
>> On Fri, 20 Oct 2017 14:21:16 +0200, Chris Wilson
>> <chris@chris-wilson.co.uk> wrote:
>>
>> > Quoting Michal Wajdeczko (2017-10-20 13:14:45)
>> >> On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson
>> >> <chris@chris-wilson.co.uk> wrote:
>> >>
>> >> > Quoting Joonas Lahtinen (2017-10-20 10:41:57)
>> >> >> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> >> >> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
>> >> >> drm_i915_error_state_buf *m,
>> >> >> > err_print_capabilities(m, &error->device_info);
>> >> >> > err_print_params(m, &error->params);
>> >> >> >
>> >> >> > + if (error->device_info.has_guc) {
>> >> >> > + intel_uc_fw_dump(&error->guc_fw, &p);
>> >> >> > + intel_uc_fw_dump(&error->huc_fw, &p);
>> >> >> > + }
>> >> >>
>> >> >> I might use "error->{g,h}uc_fw.path" as the condition, for both
>> >> >> individually. We will have DMC here in the future, too.
>> >> >
>> >> > That's the type of predicate I was looking for. If we can tell when
>> >> the
>> >> > fw has been loaded just by looking at the uc_fw struct, please do
>> so.
>> >>
>> >> If you want to include uc fw info only when fw was loaded at capture
>> >> moment,
>> >> then we can look directly at its "load_status" field:
>> >>
>> >> if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
>> >>
>> >> But what if fw was loaded earlier, before error capture is started?
>> >> Don't we want to capture whole driver configuration?
>> >
>> > The goal is to know what the driver/hw was doing at the time of the
>> > capture (which we presume is still identical to the hang).
>> >
>> > At this point, we are just asking ourselves what is the most
>> unambiguous
>> > way of printing valid data. We already show has_guc in the output, so
>> it
>> > boils down to what was the uc_fw state. It doesn't need to be
>> > load_status == SUCCESS, as we should show a failed attempt to upload
>> the
>> > fw as well.
>> >
>> > So intel_uc_fw.size?
>>
>> But then in case of zero size fw you will not get info about load
>> failure
>> ;)
>
> Heh, I would just go back and make sure we had a clear well of deducing
> when fw was being utilised. Maybe
>
> if (uc_fw.load_status != INTEL_UC_FIRMWARE_NONE)
>
If we don't care about earlier fw fetch attempts/errors, then ok.
Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 12:50 ` Michal Wajdeczko
@ 2017-10-20 12:57 ` Chris Wilson
0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 12:57 UTC (permalink / raw)
To: Michal Wajdeczko, Joonas Lahtinen, intel-gfx
Quoting Michal Wajdeczko (2017-10-20 13:50:23)
> On Fri, 20 Oct 2017 14:43:46 +0200, Chris Wilson
> <chris@chris-wilson.co.uk> wrote:
> > Heh, I would just go back and make sure we had a clear well of deducing
> > when fw was being utilised. Maybe
> >
> > if (uc_fw.load_status != INTEL_UC_FIRMWARE_NONE)
> >
>
> If we don't care about earlier fw fetch attempts/errors, then ok.
Just since the last reset is about all we aim for. It's a game of trying
to work out what information will be required to debug future hangs. Too
much and it's a needle-in-a-haystack-problem, too little and we have the
round-trip latency in getting a new kernel to where the problem is seen.
And then for the next few years being frustrated at seeing hangs with the
old error state.
I defer what information is required for debugging in the future to
those already attempting it; but I do know that recording what firmware
and what status we left the uc in will always be useful. :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
2017-10-20 11:44 ` Chris Wilson
@ 2017-10-20 14:44 ` Michal Wajdeczko
2017-10-20 14:50 ` Chris Wilson
1 sibling, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 14:44 UTC (permalink / raw)
To: intel-gfx, Joonas Lahtinen
On Fri, 20 Oct 2017 11:41:57 +0200, Joonas Lahtinen
<joonas.lahtinen@linux.intel.com> wrote:
> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> Include GuC and HuC firmware details in captured error state
>> to provide additional debug information. To reuse existing
>> uc firmware pretty printer, introduce new drm-printer variant
>> that works with our i915_error_state_buf output. Also update
>> uc firmware pretty printer to accept const input.
>>
>> v2: don't rely on current caps (Chris)
>> dump correct fw info (Michal)
>> v3: simplify capture of custom paths (Chris)
>>
>> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
> A few comments below.
>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
> Regards, Joonas
>
>> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
>> drm_i915_error_state_buf *m,
>> err_print_capabilities(m, &error->device_info);
>> err_print_params(m, &error->params);
>>
>> + if (error->device_info.has_guc) {
>> + intel_uc_fw_dump(&error->guc_fw, &p);
>> + intel_uc_fw_dump(&error->huc_fw, &p);
>> + }
>
> I might use "error->{g,h}uc_fw.path" as the condition, for both
If we use "uc_fw->path" as condition then we have to assume that
lack of uc firmware info is for this reason, and not from corrupted
or incomplete or legacy format error dump ;)
I would prefer to stay with "has_guc" condition here, and trim
output from uc fw printer if path is not set. This way we will
cover all possible scenario (no guc = no fw, no fw = no fw info)
> individually. We will have DMC here in the future, too.
Hmm, while not here, info about DMC fw is already included in
error dump. But in their case reported DMC fw state (loaded flag,
version) is taken directly from dev_priv instead of captured error
state.
Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 14:44 ` Michal Wajdeczko
@ 2017-10-20 14:50 ` Chris Wilson
2017-10-20 14:59 ` Michal Wajdeczko
0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 14:50 UTC (permalink / raw)
To: Michal Wajdeczko, intel-gfx, Joonas Lahtinen
Quoting Michal Wajdeczko (2017-10-20 15:44:02)
> On Fri, 20 Oct 2017 11:41:57 +0200, Joonas Lahtinen
> <joonas.lahtinen@linux.intel.com> wrote:
>
> > On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> >> Include GuC and HuC firmware details in captured error state
> >> to provide additional debug information. To reuse existing
> >> uc firmware pretty printer, introduce new drm-printer variant
> >> that works with our i915_error_state_buf output. Also update
> >> uc firmware pretty printer to accept const input.
> >>
> >> v2: don't rely on current caps (Chris)
> >> dump correct fw info (Michal)
> >> v3: simplify capture of custom paths (Chris)
> >>
> >> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >
> > A few comments below.
> >
> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >
> > Regards, Joonas
> >
> >> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
> >> drm_i915_error_state_buf *m,
> >> err_print_capabilities(m, &error->device_info);
> >> err_print_params(m, &error->params);
> >>
> >> + if (error->device_info.has_guc) {
> >> + intel_uc_fw_dump(&error->guc_fw, &p);
> >> + intel_uc_fw_dump(&error->huc_fw, &p);
> >> + }
> >
> > I might use "error->{g,h}uc_fw.path" as the condition, for both
>
> If we use "uc_fw->path" as condition then we have to assume that
> lack of uc firmware info is for this reason, and not from corrupted
> or incomplete or legacy format error dump ;)
>
> I would prefer to stay with "has_guc" condition here, and trim
> output from uc fw printer if path is not set. This way we will
> cover all possible scenario (no guc = no fw, no fw = no fw info)
I still do not understand the insistence on has_guc. no fw is no fw, no
guc is no guc.
> > individually. We will have DMC here in the future, too.
>
> Hmm, while not here, info about DMC fw is already included in
> error dump. But in their case reported DMC fw state (loaded flag,
> version) is taken directly from dev_priv instead of captured error
> state.
I know. You are not to copy that mistake! Also if you should happen to
feel inclined to rectify the heinous actions, preferably by removing
the dmc entirely, please do so.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 14:50 ` Chris Wilson
@ 2017-10-20 14:59 ` Michal Wajdeczko
2017-10-20 15:14 ` Chris Wilson
0 siblings, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 14:59 UTC (permalink / raw)
To: intel-gfx, Joonas Lahtinen, Chris Wilson
On Fri, 20 Oct 2017 16:50:10 +0200, Chris Wilson
<chris@chris-wilson.co.uk> wrote:
> Quoting Michal Wajdeczko (2017-10-20 15:44:02)
>> On Fri, 20 Oct 2017 11:41:57 +0200, Joonas Lahtinen
>> <joonas.lahtinen@linux.intel.com> wrote:
>>
>> > On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> >> Include GuC and HuC firmware details in captured error state
>> >> to provide additional debug information. To reuse existing
>> >> uc firmware pretty printer, introduce new drm-printer variant
>> >> that works with our i915_error_state_buf output. Also update
>> >> uc firmware pretty printer to accept const input.
>> >>
>> >> v2: don't rely on current caps (Chris)
>> >> dump correct fw info (Michal)
>> >> v3: simplify capture of custom paths (Chris)
>> >>
>> >> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
>> >> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> >
>> > A few comments below.
>> >
>> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> >
>> > Regards, Joonas
>> >
>> >> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
>> >> drm_i915_error_state_buf *m,
>> >> err_print_capabilities(m, &error->device_info);
>> >> err_print_params(m, &error->params);
>> >>
>> >> + if (error->device_info.has_guc) {
>> >> + intel_uc_fw_dump(&error->guc_fw, &p);
>> >> + intel_uc_fw_dump(&error->huc_fw, &p);
>> >> + }
>> >
>> > I might use "error->{g,h}uc_fw.path" as the condition, for both
>>
>> If we use "uc_fw->path" as condition then we have to assume that
>> lack of uc firmware info is for this reason, and not from corrupted
>> or incomplete or legacy format error dump ;)
>>
>> I would prefer to stay with "has_guc" condition here, and trim
>> output from uc fw printer if path is not set. This way we will
>> cover all possible scenario (no guc = no fw, no fw = no fw info)
>
> I still do not understand the insistence on has_guc. no fw is no fw, no
> guc is no guc.
I just wanted to cover the case where we have guc but without fw
(explicitly expressed as null path) while skipping fw info for
platforms without guc. No more guesses. No unwanted noise.
Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
2017-10-20 14:59 ` Michal Wajdeczko
@ 2017-10-20 15:14 ` Chris Wilson
0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 15:14 UTC (permalink / raw)
To: Michal Wajdeczko, intel-gfx, Joonas Lahtinen
Quoting Michal Wajdeczko (2017-10-20 15:59:41)
> On Fri, 20 Oct 2017 16:50:10 +0200, Chris Wilson
> <chris@chris-wilson.co.uk> wrote:
>
> > Quoting Michal Wajdeczko (2017-10-20 15:44:02)
> >> On Fri, 20 Oct 2017 11:41:57 +0200, Joonas Lahtinen
> >> <joonas.lahtinen@linux.intel.com> wrote:
> >>
> >> > On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> >> >> Include GuC and HuC firmware details in captured error state
> >> >> to provide additional debug information. To reuse existing
> >> >> uc firmware pretty printer, introduce new drm-printer variant
> >> >> that works with our i915_error_state_buf output. Also update
> >> >> uc firmware pretty printer to accept const input.
> >> >>
> >> >> v2: don't rely on current caps (Chris)
> >> >> dump correct fw info (Michal)
> >> >> v3: simplify capture of custom paths (Chris)
> >> >>
> >> >> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> >> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> >> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> >
> >> > A few comments below.
> >> >
> >> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> >
> >> > Regards, Joonas
> >> >
> >> >> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
> >> >> drm_i915_error_state_buf *m,
> >> >> err_print_capabilities(m, &error->device_info);
> >> >> err_print_params(m, &error->params);
> >> >>
> >> >> + if (error->device_info.has_guc) {
> >> >> + intel_uc_fw_dump(&error->guc_fw, &p);
> >> >> + intel_uc_fw_dump(&error->huc_fw, &p);
> >> >> + }
> >> >
> >> > I might use "error->{g,h}uc_fw.path" as the condition, for both
> >>
> >> If we use "uc_fw->path" as condition then we have to assume that
> >> lack of uc firmware info is for this reason, and not from corrupted
> >> or incomplete or legacy format error dump ;)
> >>
> >> I would prefer to stay with "has_guc" condition here, and trim
> >> output from uc fw printer if path is not set. This way we will
> >> cover all possible scenario (no guc = no fw, no fw = no fw info)
> >
> > I still do not understand the insistence on has_guc. no fw is no fw, no
> > guc is no guc.
>
> I just wanted to cover the case where we have guc but without fw
> (explicitly expressed as null path) while skipping fw info for
> platforms without guc. No more guesses. No unwanted noise.
The first makes sense, but the second? We either used the struct uc_fw
or we didn't. If we did and didn't have guc, that raises a question or
two -- as it should never be the case, but for the error capture it
doesn't have to know, it just prints what it has captured.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2017-10-20 15:14 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-19 20:23 [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Michal Wajdeczko
2017-10-19 20:47 ` ✗ Fi.CI.BAT: failure for drm/i915: Add Guc/HuC firmware details to error state (rev3) Patchwork
2017-10-20 9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
2017-10-20 11:44 ` Chris Wilson
2017-10-20 12:14 ` Michal Wajdeczko
2017-10-20 12:21 ` Chris Wilson
2017-10-20 12:36 ` Michal Wajdeczko
2017-10-20 12:43 ` Chris Wilson
2017-10-20 12:50 ` Michal Wajdeczko
2017-10-20 12:57 ` Chris Wilson
2017-10-20 14:44 ` Michal Wajdeczko
2017-10-20 14:50 ` Chris Wilson
2017-10-20 14:59 ` Michal Wajdeczko
2017-10-20 15:14 ` Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.