All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
@ 2017-10-19 20:23 Michal Wajdeczko
  2017-10-19 20:47 ` ✗ Fi.CI.BAT: failure for drm/i915: Add Guc/HuC firmware details to error state (rev3) Patchwork
  2017-10-20  9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
  0 siblings, 2 replies; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-19 20:23 UTC (permalink / raw)
  To: intel-gfx

Include GuC and HuC firmware details in captured error state
to provide additional debug information. To reuse existing
uc firmware pretty printer, introduce new drm-printer variant
that works with our i915_error_state_buf output. Also update
uc firmware pretty printer to accept const input.

v2: don't rely on current caps (Chris)
    dump correct fw info (Michal)
v3: simplify capture of custom paths (Chris)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 39 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_uc_fw.c    |  2 +-
 drivers/gpu/drm/i915/intel_uc_fw.h    |  2 +-
 4 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3c2649c..4d6519b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -911,6 +911,10 @@ struct i915_gpu_state {
 	struct intel_device_info device_info;
 	struct i915_params params;
 
+	/* uC state */
+	struct intel_uc_fw guc_fw;
+	struct intel_uc_fw huc_fw;
+
 	/* Generic register state */
 	u32 eir;
 	u32 pgtbl_er;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 653fb69..21d78eb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@
 #include <generated/utsrelease.h>
 #include <linux/stop_machine.h>
 #include <linux/zlib.h>
+#include <drm/drm_print.h>
+
 #include "i915_drv.h"
 
 static const char *engine_str(int engine)
@@ -175,6 +177,21 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
 #define err_puts(e, s) i915_error_puts(e, s)
 
+static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
+{
+	i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
+}
+
+static inline struct drm_printer
+i915_error_printer(struct drm_i915_error_state_buf *e)
+{
+	struct drm_printer p = {
+		.printfn = __i915_printfn_error,
+		.arg = e,
+	};
+	return p;
+}
+
 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
 
 struct compress {
@@ -592,8 +609,10 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
 int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			    const struct i915_gpu_state *error)
 {
+	struct drm_printer p = i915_error_printer(m);
 	struct drm_i915_private *dev_priv = m->i915;
 	struct drm_i915_error_object *obj;
+
 	int i, j;
 
 	if (!error) {
@@ -774,6 +793,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_print_capabilities(m, &error->device_info);
 	err_print_params(m, &error->params);
 
+	if (error->device_info.has_guc) {
+		intel_uc_fw_dump(&error->guc_fw, &p);
+		intel_uc_fw_dump(&error->huc_fw, &p);
+	}
+
 	if (m->bytes == 0 && m->err)
 		return m->err;
 
@@ -870,6 +894,9 @@ void __i915_gpu_state_free(struct kref *error_ref)
 	I915_PARAMS_FOR_EACH(FREE);
 #undef FREE
 
+	kfree(error->guc_fw.path);
+	kfree(error->huc_fw.path);
+
 	kfree(error);
 }
 
@@ -1559,6 +1586,17 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
 	error->pinned_bo = bo;
 }
 
+static void i915_capture_uc_state(struct drm_i915_private *dev_priv,
+				  struct i915_gpu_state *error)
+{
+	error->guc_fw = dev_priv->guc.fw;
+	error->huc_fw = dev_priv->huc.fw;
+
+	/* Make sure to capture custom firmware paths */
+	error->guc_fw.path = kstrdup(error->guc_fw.path, GFP_ATOMIC);
+	error->huc_fw.path = kstrdup(error->huc_fw.path, GFP_ATOMIC);
+}
+
 static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
 					    struct i915_gpu_state *error)
 {
@@ -1710,6 +1748,7 @@ static int capture(void *data)
 	I915_PARAMS_FOR_EACH(DUP);
 #undef DUP
 
+	i915_capture_uc_state(error->i915, error);
 	i915_capture_gen_state(error->i915, error);
 	i915_capture_reg_state(error->i915, error);
 	i915_gem_record_fences(error->i915, error);
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index 973888e..4bc82d3 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -299,7 +299,7 @@ void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
  *
  * Pretty printer for uC firmware.
  */
-void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p)
+void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
 {
 	drm_printf(p, "%s firmware: %s\n",
 		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index 1329036..5394d9d 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -116,6 +116,6 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
 		       int (*xfer)(struct intel_uc_fw *uc_fw,
 				   struct i915_vma *vma));
 void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
-void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p);
+void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
 
 #endif
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915: Add Guc/HuC firmware details to error state (rev3)
  2017-10-19 20:23 [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Michal Wajdeczko
@ 2017-10-19 20:47 ` Patchwork
  2017-10-20  9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
  1 sibling, 0 replies; 14+ messages in thread
From: Patchwork @ 2017-10-19 20:47 UTC (permalink / raw)
  To: Michal Wajdeczko; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Add Guc/HuC firmware details to error state (rev3)
URL   : https://patchwork.freedesktop.org/series/32293/
State : failure

== Summary ==

Series 32293v3 drm/i915: Add Guc/HuC firmware details to error state
https://patchwork.freedesktop.org/api/1.0/series/32293/revisions/3/mbox/

Test chamelium:
        Subgroup dp-hpd-fast:
                skip       -> INCOMPLETE (fi-bdw-gvtdvm) fdo#102332
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                incomplete -> PASS       (fi-skl-6700hq)
                pass       -> INCOMPLETE (fi-skl-6770hq)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-byt-n2820) fdo#101705
Test drv_module_reload:
        Subgroup basic-reload-inject:
                dmesg-warn -> INCOMPLETE (fi-cfl-s) fdo#103206

fdo#102332 https://bugs.freedesktop.org/show_bug.cgi?id=102332
fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705
fdo#103206 https://bugs.freedesktop.org/show_bug.cgi?id=103206

fi-bdw-5557u     total:289  pass:268  dwarn:0   dfail:0   fail:0   skip:21  time:439s
fi-bdw-gvtdvm    total:59   pass:0    dwarn:0   dfail:0   fail:0   skip:0  
fi-blb-e6850     total:289  pass:223  dwarn:1   dfail:0   fail:0   skip:65  time:374s
fi-bsw-n3050     total:289  pass:243  dwarn:0   dfail:0   fail:0   skip:46  time:525s
fi-bwr-2160      total:289  pass:183  dwarn:0   dfail:0   fail:0   skip:106 time:263s
fi-bxt-dsi       total:289  pass:259  dwarn:0   dfail:0   fail:0   skip:30  time:500s
fi-bxt-j4205     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:497s
fi-byt-j1900     total:289  pass:253  dwarn:1   dfail:0   fail:0   skip:35  time:494s
fi-byt-n2820     total:289  pass:249  dwarn:1   dfail:0   fail:0   skip:39  time:482s
fi-cfl-s         total:288  pass:253  dwarn:3   dfail:0   fail:0   skip:31 
fi-elk-e7500     total:289  pass:229  dwarn:0   dfail:0   fail:0   skip:60  time:415s
fi-gdg-551       total:289  pass:178  dwarn:1   dfail:0   fail:1   skip:109 time:250s
fi-glk-1         total:289  pass:261  dwarn:0   dfail:0   fail:0   skip:28  time:581s
fi-hsw-4770      total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:449s
fi-hsw-4770r     total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:426s
fi-ilk-650       total:289  pass:228  dwarn:0   dfail:0   fail:0   skip:61  time:434s
fi-ivb-3520m     total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:493s
fi-ivb-3770      total:289  pass:260  dwarn:0   dfail:0   fail:0   skip:29  time:462s
fi-kbl-7500u     total:289  pass:264  dwarn:1   dfail:0   fail:0   skip:24  time:487s
fi-kbl-7560u     total:289  pass:270  dwarn:0   dfail:0   fail:0   skip:19  time:570s
fi-kbl-7567u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:473s
fi-kbl-r         total:289  pass:262  dwarn:0   dfail:0   fail:0   skip:27  time:582s
fi-pnv-d510      total:289  pass:222  dwarn:1   dfail:0   fail:0   skip:66  time:547s
fi-skl-6260u     total:289  pass:269  dwarn:0   dfail:0   fail:0   skip:20  time:452s
fi-skl-6700hq    total:289  pass:263  dwarn:0   dfail:0   fail:0   skip:26  time:648s
fi-skl-6700k     total:289  pass:265  dwarn:0   dfail:0   fail:0   skip:24  time:521s
fi-skl-6770hq    total:219  pass:206  dwarn:0   dfail:0   fail:0   skip:12 
fi-skl-gvtdvm    total:289  pass:266  dwarn:0   dfail:0   fail:0   skip:23  time:459s
fi-snb-2520m     total:289  pass:250  dwarn:0   dfail:0   fail:0   skip:39  time:567s
fi-snb-2600      total:289  pass:249  dwarn:0   dfail:0   fail:0   skip:40  time:422s

87b113da0ae43b97650ddcec6c05ef69469e2107 drm-tip: 2017y-10m-19d-18h-14m-41s UTC integration manifest
e08b27eb1f11 drm/i915: Add Guc/HuC firmware details to error state

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_6114/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-19 20:23 [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Michal Wajdeczko
  2017-10-19 20:47 ` ✗ Fi.CI.BAT: failure for drm/i915: Add Guc/HuC firmware details to error state (rev3) Patchwork
@ 2017-10-20  9:41 ` Joonas Lahtinen
  2017-10-20 11:44   ` Chris Wilson
  2017-10-20 14:44   ` Michal Wajdeczko
  1 sibling, 2 replies; 14+ messages in thread
From: Joonas Lahtinen @ 2017-10-20  9:41 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx

On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> Include GuC and HuC firmware details in captured error state
> to provide additional debug information. To reuse existing
> uc firmware pretty printer, introduce new drm-printer variant
> that works with our i915_error_state_buf output. Also update
> uc firmware pretty printer to accept const input.
> 
> v2: don't rely on current caps (Chris)
>     dump correct fw info (Michal)
> v3: simplify capture of custom paths (Chris)
> 
> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

A few comments below.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas

> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  	err_print_capabilities(m, &error->device_info);
>  	err_print_params(m, &error->params);
>  
> +	if (error->device_info.has_guc) {
> +		intel_uc_fw_dump(&error->guc_fw, &p);
> +		intel_uc_fw_dump(&error->huc_fw, &p);
> +	}

I might use "error->{g,h}uc_fw.path" as the condition, for both
individually. We will have DMC here in the future, too.

> +static void i915_capture_uc_state(struct drm_i915_private *dev_priv,
> +				  struct i915_gpu_state *error)
> +{
> +	error->guc_fw = dev_priv->guc.fw;
> +	error->huc_fw = dev_priv->huc.fw;
> +
> +	/* Make sure to capture custom firmware paths */

This is again a "what" comment, not "why". And they're not necessarily
the custom ones. The "why" is generic to all error capture funcs.

> +	error->guc_fw.path = kstrdup(error->guc_fw.path, GFP_ATOMIC);
> +	error->huc_fw.path = kstrdup(error->huc_fw.path, GFP_ATOMIC);
> +}
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20  9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
@ 2017-10-20 11:44   ` Chris Wilson
  2017-10-20 12:14     ` Michal Wajdeczko
  2017-10-20 14:44   ` Michal Wajdeczko
  1 sibling, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 11:44 UTC (permalink / raw)
  To: Joonas Lahtinen, Michal Wajdeczko, intel-gfx

Quoting Joonas Lahtinen (2017-10-20 10:41:57)
> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> >       err_print_capabilities(m, &error->device_info);
> >       err_print_params(m, &error->params);
> >  
> > +     if (error->device_info.has_guc) {
> > +             intel_uc_fw_dump(&error->guc_fw, &p);
> > +             intel_uc_fw_dump(&error->huc_fw, &p);
> > +     }
> 
> I might use "error->{g,h}uc_fw.path" as the condition, for both
> individually. We will have DMC here in the future, too.

That's the type of predicate I was looking for. If we can tell when the
fw has been loaded just by looking at the uc_fw struct, please do so.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 11:44   ` Chris Wilson
@ 2017-10-20 12:14     ` Michal Wajdeczko
  2017-10-20 12:21       ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 12:14 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx, Chris Wilson

On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Joonas Lahtinen (2017-10-20 10:41:57)
>> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct  
>> drm_i915_error_state_buf *m,
>> >       err_print_capabilities(m, &error->device_info);
>> >       err_print_params(m, &error->params);
>> >
>> > +     if (error->device_info.has_guc) {
>> > +             intel_uc_fw_dump(&error->guc_fw, &p);
>> > +             intel_uc_fw_dump(&error->huc_fw, &p);
>> > +     }
>>
>> I might use "error->{g,h}uc_fw.path" as the condition, for both
>> individually. We will have DMC here in the future, too.
>
> That's the type of predicate I was looking for. If we can tell when the
> fw has been loaded just by looking at the uc_fw struct, please do so.

If you want to include uc fw info only when fw was loaded at capture  
moment,
then we can look directly at its "load_status" field:

	if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)

But what if fw was loaded earlier, before error capture is started?
Don't we want to capture whole driver configuration?

Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 12:14     ` Michal Wajdeczko
@ 2017-10-20 12:21       ` Chris Wilson
  2017-10-20 12:36         ` Michal Wajdeczko
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 12:21 UTC (permalink / raw)
  To: Michal Wajdeczko, Joonas Lahtinen, intel-gfx

Quoting Michal Wajdeczko (2017-10-20 13:14:45)
> On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson  
> <chris@chris-wilson.co.uk> wrote:
> 
> > Quoting Joonas Lahtinen (2017-10-20 10:41:57)
> >> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> >> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct  
> >> drm_i915_error_state_buf *m,
> >> >       err_print_capabilities(m, &error->device_info);
> >> >       err_print_params(m, &error->params);
> >> >
> >> > +     if (error->device_info.has_guc) {
> >> > +             intel_uc_fw_dump(&error->guc_fw, &p);
> >> > +             intel_uc_fw_dump(&error->huc_fw, &p);
> >> > +     }
> >>
> >> I might use "error->{g,h}uc_fw.path" as the condition, for both
> >> individually. We will have DMC here in the future, too.
> >
> > That's the type of predicate I was looking for. If we can tell when the
> > fw has been loaded just by looking at the uc_fw struct, please do so.
> 
> If you want to include uc fw info only when fw was loaded at capture  
> moment,
> then we can look directly at its "load_status" field:
> 
>         if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> 
> But what if fw was loaded earlier, before error capture is started?
> Don't we want to capture whole driver configuration?

The goal is to know what the driver/hw was doing at the time of the
capture (which we presume is still identical to the hang).

At this point, we are just asking ourselves what is the most unambiguous
way of printing valid data. We already show has_guc in the output, so it
boils down to what was the uc_fw state. It doesn't need to be
load_status == SUCCESS, as we should show a failed attempt to upload the
fw as well.

So intel_uc_fw.size?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 12:21       ` Chris Wilson
@ 2017-10-20 12:36         ` Michal Wajdeczko
  2017-10-20 12:43           ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 12:36 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx, Chris Wilson

On Fri, 20 Oct 2017 14:21:16 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Michal Wajdeczko (2017-10-20 13:14:45)
>> On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson
>> <chris@chris-wilson.co.uk> wrote:
>>
>> > Quoting Joonas Lahtinen (2017-10-20 10:41:57)
>> >> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> >> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
>> >> drm_i915_error_state_buf *m,
>> >> >       err_print_capabilities(m, &error->device_info);
>> >> >       err_print_params(m, &error->params);
>> >> >
>> >> > +     if (error->device_info.has_guc) {
>> >> > +             intel_uc_fw_dump(&error->guc_fw, &p);
>> >> > +             intel_uc_fw_dump(&error->huc_fw, &p);
>> >> > +     }
>> >>
>> >> I might use "error->{g,h}uc_fw.path" as the condition, for both
>> >> individually. We will have DMC here in the future, too.
>> >
>> > That's the type of predicate I was looking for. If we can tell when  
>> the
>> > fw has been loaded just by looking at the uc_fw struct, please do so.
>>
>> If you want to include uc fw info only when fw was loaded at capture
>> moment,
>> then we can look directly at its "load_status" field:
>>
>>         if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
>>
>> But what if fw was loaded earlier, before error capture is started?
>> Don't we want to capture whole driver configuration?
>
> The goal is to know what the driver/hw was doing at the time of the
> capture (which we presume is still identical to the hang).
>
> At this point, we are just asking ourselves what is the most unambiguous
> way of printing valid data. We already show has_guc in the output, so it
> boils down to what was the uc_fw state. It doesn't need to be
> load_status == SUCCESS, as we should show a failed attempt to upload the
> fw as well.
>
> So intel_uc_fw.size?

But then in case of zero size fw you will not get info about load failure  
;)

What about leaving here "if (error->device_info.has_guc)" as main guard
for both GuC and HuC fw dumps (note that upcoming series make it explicit
that HuC is always in pair with GuC) but then also add "if (uc_fw->path)"
condition to the uc fw pretty printer to stop printing fw details if no
path was specified (all fields will be void anyway). Then we may get:

has_guc: yes
...
GuC firmware: (null)
HuC firmware: (null)


Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 12:36         ` Michal Wajdeczko
@ 2017-10-20 12:43           ` Chris Wilson
  2017-10-20 12:50             ` Michal Wajdeczko
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 12:43 UTC (permalink / raw)
  To: Michal Wajdeczko, Joonas Lahtinen, intel-gfx

Quoting Michal Wajdeczko (2017-10-20 13:36:59)
> On Fri, 20 Oct 2017 14:21:16 +0200, Chris Wilson  
> <chris@chris-wilson.co.uk> wrote:
> 
> > Quoting Michal Wajdeczko (2017-10-20 13:14:45)
> >> On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson
> >> <chris@chris-wilson.co.uk> wrote:
> >>
> >> > Quoting Joonas Lahtinen (2017-10-20 10:41:57)
> >> >> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> >> >> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
> >> >> drm_i915_error_state_buf *m,
> >> >> >       err_print_capabilities(m, &error->device_info);
> >> >> >       err_print_params(m, &error->params);
> >> >> >
> >> >> > +     if (error->device_info.has_guc) {
> >> >> > +             intel_uc_fw_dump(&error->guc_fw, &p);
> >> >> > +             intel_uc_fw_dump(&error->huc_fw, &p);
> >> >> > +     }
> >> >>
> >> >> I might use "error->{g,h}uc_fw.path" as the condition, for both
> >> >> individually. We will have DMC here in the future, too.
> >> >
> >> > That's the type of predicate I was looking for. If we can tell when  
> >> the
> >> > fw has been loaded just by looking at the uc_fw struct, please do so.
> >>
> >> If you want to include uc fw info only when fw was loaded at capture
> >> moment,
> >> then we can look directly at its "load_status" field:
> >>
> >>         if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
> >>
> >> But what if fw was loaded earlier, before error capture is started?
> >> Don't we want to capture whole driver configuration?
> >
> > The goal is to know what the driver/hw was doing at the time of the
> > capture (which we presume is still identical to the hang).
> >
> > At this point, we are just asking ourselves what is the most unambiguous
> > way of printing valid data. We already show has_guc in the output, so it
> > boils down to what was the uc_fw state. It doesn't need to be
> > load_status == SUCCESS, as we should show a failed attempt to upload the
> > fw as well.
> >
> > So intel_uc_fw.size?
> 
> But then in case of zero size fw you will not get info about load failure  
> ;)

Heh, I would just go back and make sure we had a clear well of deducing
when fw was being utilised. Maybe 

	if (uc_fw.load_status != INTEL_UC_FIRMWARE_NONE)

?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 12:43           ` Chris Wilson
@ 2017-10-20 12:50             ` Michal Wajdeczko
  2017-10-20 12:57               ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 12:50 UTC (permalink / raw)
  To: Joonas Lahtinen, intel-gfx, Chris Wilson

On Fri, 20 Oct 2017 14:43:46 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Michal Wajdeczko (2017-10-20 13:36:59)
>> On Fri, 20 Oct 2017 14:21:16 +0200, Chris Wilson
>> <chris@chris-wilson.co.uk> wrote:
>>
>> > Quoting Michal Wajdeczko (2017-10-20 13:14:45)
>> >> On Fri, 20 Oct 2017 13:44:04 +0200, Chris Wilson
>> >> <chris@chris-wilson.co.uk> wrote:
>> >>
>> >> > Quoting Joonas Lahtinen (2017-10-20 10:41:57)
>> >> >> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> >> >> > @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
>> >> >> drm_i915_error_state_buf *m,
>> >> >> >       err_print_capabilities(m, &error->device_info);
>> >> >> >       err_print_params(m, &error->params);
>> >> >> >
>> >> >> > +     if (error->device_info.has_guc) {
>> >> >> > +             intel_uc_fw_dump(&error->guc_fw, &p);
>> >> >> > +             intel_uc_fw_dump(&error->huc_fw, &p);
>> >> >> > +     }
>> >> >>
>> >> >> I might use "error->{g,h}uc_fw.path" as the condition, for both
>> >> >> individually. We will have DMC here in the future, too.
>> >> >
>> >> > That's the type of predicate I was looking for. If we can tell when
>> >> the
>> >> > fw has been loaded just by looking at the uc_fw struct, please do  
>> so.
>> >>
>> >> If you want to include uc fw info only when fw was loaded at capture
>> >> moment,
>> >> then we can look directly at its "load_status" field:
>> >>
>> >>         if (error->guc_fw.load_status == INTEL_UC_FIRMWARE_SUCCESS)
>> >>
>> >> But what if fw was loaded earlier, before error capture is started?
>> >> Don't we want to capture whole driver configuration?
>> >
>> > The goal is to know what the driver/hw was doing at the time of the
>> > capture (which we presume is still identical to the hang).
>> >
>> > At this point, we are just asking ourselves what is the most  
>> unambiguous
>> > way of printing valid data. We already show has_guc in the output, so  
>> it
>> > boils down to what was the uc_fw state. It doesn't need to be
>> > load_status == SUCCESS, as we should show a failed attempt to upload  
>> the
>> > fw as well.
>> >
>> > So intel_uc_fw.size?
>>
>> But then in case of zero size fw you will not get info about load  
>> failure
>> ;)
>
> Heh, I would just go back and make sure we had a clear well of deducing
> when fw was being utilised. Maybe
>
> 	if (uc_fw.load_status != INTEL_UC_FIRMWARE_NONE)
>

If we don't care about earlier fw fetch attempts/errors, then ok.

Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 12:50             ` Michal Wajdeczko
@ 2017-10-20 12:57               ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 12:57 UTC (permalink / raw)
  To: Michal Wajdeczko, Joonas Lahtinen, intel-gfx

Quoting Michal Wajdeczko (2017-10-20 13:50:23)
> On Fri, 20 Oct 2017 14:43:46 +0200, Chris Wilson  
> <chris@chris-wilson.co.uk> wrote:
> > Heh, I would just go back and make sure we had a clear well of deducing
> > when fw was being utilised. Maybe
> >
> >       if (uc_fw.load_status != INTEL_UC_FIRMWARE_NONE)
> >
> 
> If we don't care about earlier fw fetch attempts/errors, then ok.

Just since the last reset is about all we aim for. It's a game of trying
to work out what information will be required to debug future hangs. Too
much and it's a needle-in-a-haystack-problem, too little and we have the
round-trip latency in getting a new kernel to where the problem is seen.
And then for the next few years being frustrated at seeing hangs with the
old error state.

I defer what information is required for debugging in the future to
those already attempting it; but I do know that recording what firmware
and what status we left the uc in will always be useful. :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20  9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
  2017-10-20 11:44   ` Chris Wilson
@ 2017-10-20 14:44   ` Michal Wajdeczko
  2017-10-20 14:50     ` Chris Wilson
  1 sibling, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 14:44 UTC (permalink / raw)
  To: intel-gfx, Joonas Lahtinen

On Fri, 20 Oct 2017 11:41:57 +0200, Joonas Lahtinen  
<joonas.lahtinen@linux.intel.com> wrote:

> On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> Include GuC and HuC firmware details in captured error state
>> to provide additional debug information. To reuse existing
>> uc firmware pretty printer, introduce new drm-printer variant
>> that works with our i915_error_state_buf output. Also update
>> uc firmware pretty printer to accept const input.
>>
>> v2: don't rely on current caps (Chris)
>>     dump correct fw info (Michal)
>> v3: simplify capture of custom paths (Chris)
>>
>> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
> A few comments below.
>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
> Regards, Joonas
>
>> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct  
>> drm_i915_error_state_buf *m,
>>  	err_print_capabilities(m, &error->device_info);
>>  	err_print_params(m, &error->params);
>>
>> +	if (error->device_info.has_guc) {
>> +		intel_uc_fw_dump(&error->guc_fw, &p);
>> +		intel_uc_fw_dump(&error->huc_fw, &p);
>> +	}
>
> I might use "error->{g,h}uc_fw.path" as the condition, for both

If we use "uc_fw->path" as condition then we have to assume that
lack of uc firmware info is for this reason, and not from corrupted
or incomplete or legacy format error dump ;)

I would prefer to stay with "has_guc" condition here, and trim
output from uc fw printer if path is not set. This way we will
cover all possible scenario (no guc = no fw, no fw = no fw info)

> individually. We will have DMC here in the future, too.

Hmm, while not here, info about DMC fw is already included in
error dump. But in their case reported DMC fw state (loaded flag,
version) is taken directly from dev_priv instead of captured error
state.

Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 14:44   ` Michal Wajdeczko
@ 2017-10-20 14:50     ` Chris Wilson
  2017-10-20 14:59       ` Michal Wajdeczko
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 14:50 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx, Joonas Lahtinen

Quoting Michal Wajdeczko (2017-10-20 15:44:02)
> On Fri, 20 Oct 2017 11:41:57 +0200, Joonas Lahtinen  
> <joonas.lahtinen@linux.intel.com> wrote:
> 
> > On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> >> Include GuC and HuC firmware details in captured error state
> >> to provide additional debug information. To reuse existing
> >> uc firmware pretty printer, introduce new drm-printer variant
> >> that works with our i915_error_state_buf output. Also update
> >> uc firmware pretty printer to accept const input.
> >>
> >> v2: don't rely on current caps (Chris)
> >>     dump correct fw info (Michal)
> >> v3: simplify capture of custom paths (Chris)
> >>
> >> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >
> > A few comments below.
> >
> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >
> > Regards, Joonas
> >
> >> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct  
> >> drm_i915_error_state_buf *m,
> >>      err_print_capabilities(m, &error->device_info);
> >>      err_print_params(m, &error->params);
> >>
> >> +    if (error->device_info.has_guc) {
> >> +            intel_uc_fw_dump(&error->guc_fw, &p);
> >> +            intel_uc_fw_dump(&error->huc_fw, &p);
> >> +    }
> >
> > I might use "error->{g,h}uc_fw.path" as the condition, for both
> 
> If we use "uc_fw->path" as condition then we have to assume that
> lack of uc firmware info is for this reason, and not from corrupted
> or incomplete or legacy format error dump ;)
> 
> I would prefer to stay with "has_guc" condition here, and trim
> output from uc fw printer if path is not set. This way we will
> cover all possible scenario (no guc = no fw, no fw = no fw info)

I still do not understand the insistence on has_guc. no fw is no fw, no
guc is no guc.
 
> > individually. We will have DMC here in the future, too.
> 
> Hmm, while not here, info about DMC fw is already included in
> error dump. But in their case reported DMC fw state (loaded flag,
> version) is taken directly from dev_priv instead of captured error
> state.

I know. You are not to copy that mistake! Also if you should happen to
feel inclined to rectify the heinous actions, preferably by removing
the dmc entirely, please do so.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 14:50     ` Chris Wilson
@ 2017-10-20 14:59       ` Michal Wajdeczko
  2017-10-20 15:14         ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Michal Wajdeczko @ 2017-10-20 14:59 UTC (permalink / raw)
  To: intel-gfx, Joonas Lahtinen, Chris Wilson

On Fri, 20 Oct 2017 16:50:10 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Michal Wajdeczko (2017-10-20 15:44:02)
>> On Fri, 20 Oct 2017 11:41:57 +0200, Joonas Lahtinen
>> <joonas.lahtinen@linux.intel.com> wrote:
>>
>> > On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
>> >> Include GuC and HuC firmware details in captured error state
>> >> to provide additional debug information. To reuse existing
>> >> uc firmware pretty printer, introduce new drm-printer variant
>> >> that works with our i915_error_state_buf output. Also update
>> >> uc firmware pretty printer to accept const input.
>> >>
>> >> v2: don't rely on current caps (Chris)
>> >>     dump correct fw info (Michal)
>> >> v3: simplify capture of custom paths (Chris)
>> >>
>> >> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
>> >> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> >
>> > A few comments below.
>> >
>> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> >
>> > Regards, Joonas
>> >
>> >> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
>> >> drm_i915_error_state_buf *m,
>> >>      err_print_capabilities(m, &error->device_info);
>> >>      err_print_params(m, &error->params);
>> >>
>> >> +    if (error->device_info.has_guc) {
>> >> +            intel_uc_fw_dump(&error->guc_fw, &p);
>> >> +            intel_uc_fw_dump(&error->huc_fw, &p);
>> >> +    }
>> >
>> > I might use "error->{g,h}uc_fw.path" as the condition, for both
>>
>> If we use "uc_fw->path" as condition then we have to assume that
>> lack of uc firmware info is for this reason, and not from corrupted
>> or incomplete or legacy format error dump ;)
>>
>> I would prefer to stay with "has_guc" condition here, and trim
>> output from uc fw printer if path is not set. This way we will
>> cover all possible scenario (no guc = no fw, no fw = no fw info)
>
> I still do not understand the insistence on has_guc. no fw is no fw, no
> guc is no guc.

I just wanted to cover the case where we have guc but without fw
(explicitly expressed as null path) while skipping fw info for
platforms without guc. No more guesses. No unwanted noise.

Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state
  2017-10-20 14:59       ` Michal Wajdeczko
@ 2017-10-20 15:14         ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-10-20 15:14 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx, Joonas Lahtinen

Quoting Michal Wajdeczko (2017-10-20 15:59:41)
> On Fri, 20 Oct 2017 16:50:10 +0200, Chris Wilson  
> <chris@chris-wilson.co.uk> wrote:
> 
> > Quoting Michal Wajdeczko (2017-10-20 15:44:02)
> >> On Fri, 20 Oct 2017 11:41:57 +0200, Joonas Lahtinen
> >> <joonas.lahtinen@linux.intel.com> wrote:
> >>
> >> > On Thu, 2017-10-19 at 20:23 +0000, Michal Wajdeczko wrote:
> >> >> Include GuC and HuC firmware details in captured error state
> >> >> to provide additional debug information. To reuse existing
> >> >> uc firmware pretty printer, introduce new drm-printer variant
> >> >> that works with our i915_error_state_buf output. Also update
> >> >> uc firmware pretty printer to accept const input.
> >> >>
> >> >> v2: don't rely on current caps (Chris)
> >> >>     dump correct fw info (Michal)
> >> >> v3: simplify capture of custom paths (Chris)
> >> >>
> >> >> Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> >> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> >> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> >
> >> > A few comments below.
> >> >
> >> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> >
> >> > Regards, Joonas
> >> >
> >> >> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct
> >> >> drm_i915_error_state_buf *m,
> >> >>      err_print_capabilities(m, &error->device_info);
> >> >>      err_print_params(m, &error->params);
> >> >>
> >> >> +    if (error->device_info.has_guc) {
> >> >> +            intel_uc_fw_dump(&error->guc_fw, &p);
> >> >> +            intel_uc_fw_dump(&error->huc_fw, &p);
> >> >> +    }
> >> >
> >> > I might use "error->{g,h}uc_fw.path" as the condition, for both
> >>
> >> If we use "uc_fw->path" as condition then we have to assume that
> >> lack of uc firmware info is for this reason, and not from corrupted
> >> or incomplete or legacy format error dump ;)
> >>
> >> I would prefer to stay with "has_guc" condition here, and trim
> >> output from uc fw printer if path is not set. This way we will
> >> cover all possible scenario (no guc = no fw, no fw = no fw info)
> >
> > I still do not understand the insistence on has_guc. no fw is no fw, no
> > guc is no guc.
> 
> I just wanted to cover the case where we have guc but without fw
> (explicitly expressed as null path) while skipping fw info for
> platforms without guc. No more guesses. No unwanted noise.

The first makes sense, but the second? We either used the struct uc_fw
or we didn't. If we did and didn't have guc, that raises a question or
two -- as it should never be the case, but for the error capture it
doesn't have to know, it just prints what it has captured.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-10-20 15:14 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-19 20:23 [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Michal Wajdeczko
2017-10-19 20:47 ` ✗ Fi.CI.BAT: failure for drm/i915: Add Guc/HuC firmware details to error state (rev3) Patchwork
2017-10-20  9:41 ` [PATCH v3] drm/i915: Add Guc/HuC firmware details to error state Joonas Lahtinen
2017-10-20 11:44   ` Chris Wilson
2017-10-20 12:14     ` Michal Wajdeczko
2017-10-20 12:21       ` Chris Wilson
2017-10-20 12:36         ` Michal Wajdeczko
2017-10-20 12:43           ` Chris Wilson
2017-10-20 12:50             ` Michal Wajdeczko
2017-10-20 12:57               ` Chris Wilson
2017-10-20 14:44   ` Michal Wajdeczko
2017-10-20 14:50     ` Chris Wilson
2017-10-20 14:59       ` Michal Wajdeczko
2017-10-20 15:14         ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.