All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure
@ 2018-02-20 22:57 Michal Wajdeczko
  2018-02-20 22:57 ` [PATCH v2 2/2] HAX: Enable GuC for CI Michal Wajdeczko
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Michal Wajdeczko @ 2018-02-20 22:57 UTC (permalink / raw)
  To: intel-gfx

Since commit 6ca9a2beb54a ("drm/i915: Unwind i915_gem_init() failure")
we believed that we correctly handle all errors encountered during
GuC initialization, including special one that indicates request to
run driver with disabled GPU submission (-EIO).

Unfortunately since commit 121981fafe69 ("drm/i915/guc: Combine
enable_guc_loading|submission modparams") we stopped using that
error code to avoid unwanted fallback to execlist submission mode.

In result any GuC initialization failure was treated as non-recoverable
error leading to driver load abort, so we could not even read related
GuC error log to investigate cause of the problem.

Fix that by always returning -EIO on uC hardware related failure.

v2: don't allow -EIO from uc_init
    don't call uc_fini[_misc] on -EIO
    mark guc fw as failed on hw init failure
    prepare uc_fini_hw to run after earlier -EIO

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c    | 13 ++++++++-----
 drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
 drivers/gpu/drm/i915/intel_uc.c    | 13 +++++++++----
 drivers/gpu/drm/i915/intel_uc_fw.h |  5 +++++
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 631a2db..80f23a8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5324,8 +5324,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_init_gt_powersave(dev_priv);
 
 	ret = intel_uc_init(dev_priv);
-	if (ret)
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
 		goto err_pm;
+	}
 
 	ret = i915_gem_init_hw(dev_priv);
 	if (ret)
@@ -5372,7 +5374,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	i915_gem_contexts_lost(dev_priv);
 	intel_uc_fini_hw(dev_priv);
 err_uc_init:
-	intel_uc_fini(dev_priv);
+	if (ret != -EIO)
+		intel_uc_fini(dev_priv);
 err_pm:
 	if (ret != -EIO) {
 		intel_cleanup_gt_powersave(dev_priv);
@@ -5386,10 +5389,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	intel_uc_fini_misc(dev_priv);
-
-	if (ret != -EIO)
+	if (ret != -EIO) {
+		intel_uc_fini_misc(dev_priv);
 		i915_gem_cleanup_userptr(dev_priv);
+	}
 
 	if (ret == -EIO) {
 		/*
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 52856a9..512ff7b 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -100,6 +100,11 @@ static inline void intel_guc_notify(struct intel_guc *guc)
 	guc->notify(guc);
 }
 
+static inline bool intel_guc_is_loaded(struct intel_guc *guc)
+{
+	return intel_uc_fw_is_loaded(&guc->fw);
+}
+
 /*
  * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP),
  * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 9f1bac6..75d0eb9 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -421,11 +421,13 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	 * Note that there is no fallback as either user explicitly asked for
 	 * the GuC or driver default option was to run with the GuC enabled.
 	 */
-	if (GEM_WARN_ON(ret == -EIO))
-		ret = -EINVAL;
-
 	dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret);
-	return ret;
+
+	/* Mark GuC firmware as failed to avoid redundant clean-up */
+	guc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
+
+	/* We want to disable GPU submission but keep KMS alive */
+	return -EIO;
 }
 
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
@@ -437,6 +439,9 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 
+	if (!intel_guc_is_loaded(guc))
+		return;
+
 	if (USES_GUC_SUBMISSION(dev_priv))
 		intel_guc_submission_disable(guc);
 
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index d5fd460..0e3b237 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -115,6 +115,11 @@ static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
 	return uc_fw->path != NULL;
 }
 
+static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
+{
+	return uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS;
+}
+
 void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		       struct intel_uc_fw *uc_fw);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/2] HAX: Enable GuC for CI
  2018-02-20 22:57 [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure Michal Wajdeczko
@ 2018-02-20 22:57 ` Michal Wajdeczko
  2018-02-20 23:36 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/2] drm/i915/guc: Use correct error code for GuC initialization failure Patchwork
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Michal Wajdeczko @ 2018-02-20 22:57 UTC (permalink / raw)
  To: intel-gfx

v2: except running with HYPERVISOR

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 drivers/gpu/drm/i915/i915_params.h | 2 +-
 drivers/gpu/drm/i915/intel_uc.c    | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 430f5f9..3deae1e 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -47,7 +47,7 @@
 	param(int, disable_power_well, -1) \
 	param(int, enable_ips, 1) \
 	param(int, invert_brightness, 0) \
-	param(int, enable_guc, 0) \
+	param(int, enable_guc, -1) \
 	param(int, guc_log_level, 0) \
 	param(char *, guc_firmware_path, NULL) \
 	param(char *, huc_firmware_path, NULL) \
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 75d0eb9..c577193 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -63,6 +63,8 @@ static int __get_platform_enable_guc(struct drm_i915_private *dev_priv)
 		enable_guc |= ENABLE_GUC_LOAD_HUC;
 
 	/* Any platform specific fine-tuning can be done here */
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+		enable_guc = 0;
 
 	return enable_guc;
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v2,1/2] drm/i915/guc: Use correct error code for GuC initialization failure
  2018-02-20 22:57 [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure Michal Wajdeczko
  2018-02-20 22:57 ` [PATCH v2 2/2] HAX: Enable GuC for CI Michal Wajdeczko
@ 2018-02-20 23:36 ` Patchwork
  2018-02-21  7:18 ` ✗ Fi.CI.IGT: failure " Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2018-02-20 23:36 UTC (permalink / raw)
  To: Michal Wajdeczko; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/2] drm/i915/guc: Use correct error code for GuC initialization failure
URL   : https://patchwork.freedesktop.org/series/38640/
State : success

== Summary ==

Series 38640v1 series starting with [v2,1/2] drm/i915/guc: Use correct error code for GuC initialization failure
https://patchwork.freedesktop.org/api/1.0/series/38640/revisions/1/mbox/

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:417s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:425s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:371s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:484s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:284s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:477s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:481s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:464s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:453s
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:564s
fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:414s
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:281s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:502s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:385s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:406s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:452s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:417s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:448s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:485s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:446s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:492s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:587s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:423s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:501s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:516s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:484s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:484s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:403s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:430s
fi-snb-2520m     total:245  pass:211  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:389s

f727568c3b37c1349f635efcc67d64ac3cf77108 drm-tip: 2018y-02m-20d-20h-39m-03s UTC integration manifest
fbd8098eb01b HAX: Enable GuC for CI
23fcb79df869 drm/i915/guc: Use correct error code for GuC initialization failure

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8091/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [v2,1/2] drm/i915/guc: Use correct error code for GuC initialization failure
  2018-02-20 22:57 [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure Michal Wajdeczko
  2018-02-20 22:57 ` [PATCH v2 2/2] HAX: Enable GuC for CI Michal Wajdeczko
  2018-02-20 23:36 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/2] drm/i915/guc: Use correct error code for GuC initialization failure Patchwork
@ 2018-02-21  7:18 ` Patchwork
  2018-02-21  8:08 ` [PATCH v2 1/2] " Sagar Arun Kamble
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2018-02-21  7:18 UTC (permalink / raw)
  To: Michal Wajdeczko; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,1/2] drm/i915/guc: Use correct error code for GuC initialization failure
URL   : https://patchwork.freedesktop.org/series/38640/
State : failure

== Summary ==

Test kms_vblank:
        Subgroup pipe-c-wait-idle:
                dmesg-warn -> PASS       (shard-hsw) fdo#102614
        Subgroup pipe-b-accuracy-idle:
                pass       -> FAIL       (shard-hsw) fdo#102583
Test drv_missed_irq:
                pass       -> SKIP       (shard-apl)
Test perf:
        Subgroup gen8-unprivileged-single-ctx-counters:
                pass       -> FAIL       (shard-apl)
Test kms_flip:
        Subgroup modeset-vs-vblank-race:
                pass       -> FAIL       (shard-hsw) fdo#103060
Test gem_exec_schedule:
        Subgroup smoketest-all:
                pass       -> FAIL       (shard-apl)
        Subgroup smoketest-render:
                pass       -> FAIL       (shard-apl)
Test kms_cursor_crc:
        Subgroup cursor-256x256-suspend:
                incomplete -> PASS       (shard-hsw) fdo#103375
Test drv_selftest:
        Subgroup live_guc:
                pass       -> DMESG-WARN (shard-apl)
Test kms_chv_cursor_fail:
        Subgroup pipe-b-128x128-bottom-edge:
                pass       -> DMESG-WARN (shard-snb)

fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
fdo#102583 https://bugs.freedesktop.org/show_bug.cgi?id=102583
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375

shard-apl        total:3367 pass:1753 dwarn:2   dfail:0   fail:18  skip:1593 time:12258s
shard-hsw        total:3429 pass:1757 dwarn:1   dfail:0   fail:5   skip:1665 time:11555s
shard-snb        total:3429 pass:1349 dwarn:2   dfail:0   fail:2   skip:2076 time:6574s
Blacklisted hosts:
shard-kbl        total:3192 pass:1736 dwarn:2   dfail:1   fail:24  skip:1424 time:8272s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8091/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure
  2018-02-20 22:57 [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure Michal Wajdeczko
                   ` (2 preceding siblings ...)
  2018-02-21  7:18 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-02-21  8:08 ` Sagar Arun Kamble
  2018-02-21 12:20   ` Michal Wajdeczko
  2018-02-21  8:27 ` Chris Wilson
  2018-02-21 16:54 ` Daniele Ceraolo Spurio
  5 siblings, 1 reply; 10+ messages in thread
From: Sagar Arun Kamble @ 2018-02-21  8:08 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx



On 2/21/2018 4:27 AM, Michal Wajdeczko wrote:
> Since commit 6ca9a2beb54a ("drm/i915: Unwind i915_gem_init() failure")
> we believed that we correctly handle all errors encountered during
> GuC initialization, including special one that indicates request to
> run driver with disabled GPU submission (-EIO).
>
> Unfortunately since commit 121981fafe69 ("drm/i915/guc: Combine
> enable_guc_loading|submission modparams") we stopped using that
> error code to avoid unwanted fallback to execlist submission mode.
>
> In result any GuC initialization failure was treated as non-recoverable
> error leading to driver load abort, so we could not even read related
> GuC error log to investigate cause of the problem.
>
> Fix that by always returning -EIO on uC hardware related failure.
>
> v2: don't allow -EIO from uc_init
>      don't call uc_fini[_misc] on -EIO
>      mark guc fw as failed on hw init failure
>      prepare uc_fini_hw to run after earlier -EIO
>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c    | 13 ++++++++-----
>   drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
>   drivers/gpu/drm/i915/intel_uc.c    | 13 +++++++++----
>   drivers/gpu/drm/i915/intel_uc_fw.h |  5 +++++
>   4 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 631a2db..80f23a8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5324,8 +5324,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   	intel_init_gt_powersave(dev_priv);
>   
>   	ret = intel_uc_init(dev_priv);
> -	if (ret)
> +	if (ret) {
> +		GEM_BUG_ON(ret == -EIO);
>   		goto err_pm;
> +	}
>   
>   	ret = i915_gem_init_hw(dev_priv);
>   	if (ret)
> @@ -5372,7 +5374,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   	i915_gem_contexts_lost(dev_priv);
>   	intel_uc_fini_hw(dev_priv);
This uc_fini_hw should also be not called on -EIO?
>   err_uc_init:
> -	intel_uc_fini(dev_priv);
> +	if (ret != -EIO)
> +		intel_uc_fini(dev_priv);
>   err_pm:
>   	if (ret != -EIO) {
>   		intel_cleanup_gt_powersave(dev_priv);
> @@ -5386,10 +5389,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   
> -	intel_uc_fini_misc(dev_priv);
> -
> -	if (ret != -EIO)
> +	if (ret != -EIO) {
> +		intel_uc_fini_misc(dev_priv);
>   		i915_gem_cleanup_userptr(dev_priv);
> +	}
>   
>   	if (ret == -EIO) {
>   		/*
Comment here can be updated to say "Allow engines or uC initialization 
to fail ... "
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 52856a9..512ff7b 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -100,6 +100,11 @@ static inline void intel_guc_notify(struct intel_guc *guc)
>   	guc->notify(guc);
>   }
>   
> +static inline bool intel_guc_is_loaded(struct intel_guc *guc)
> +{
> +	return intel_uc_fw_is_loaded(&guc->fw);
> +}
> +
>   /*
>    * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP),
>    * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 9f1bac6..75d0eb9 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -421,11 +421,13 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
>   	 * Note that there is no fallback as either user explicitly asked for
>   	 * the GuC or driver default option was to run with the GuC enabled.
>   	 */
> -	if (GEM_WARN_ON(ret == -EIO))
> -		ret = -EINVAL;
> -
>   	dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret);
> -	return ret;
> +
> +	/* Mark GuC firmware as failed to avoid redundant clean-up */
uc_fini_hw is not redundant. uc_fini[_misc] was redundant. May be we 
should say
"to avoid clean-up on wedged"
> +	guc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
> +
> +	/* We want to disable GPU submission but keep KMS alive */
> +	return -EIO;
>   }
>   
>   void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
> @@ -437,6 +439,9 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
>   
>   	GEM_BUG_ON(!HAS_GUC(dev_priv));
>   
> +	if (!intel_guc_is_loaded(guc))
> +		return;
> +
Can we skip based on i915_terminally_wedged instead?
Similarly wedged check is needed for invoking other portion of 
i915_gem_fini like gem_cleanup_engines, gem_contexts_fini since
we skipped them on -EIO during gem_init.
>   	if (USES_GUC_SUBMISSION(dev_priv))
>   		intel_guc_submission_disable(guc);
>   
> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
> index d5fd460..0e3b237 100644
> --- a/drivers/gpu/drm/i915/intel_uc_fw.h
> +++ b/drivers/gpu/drm/i915/intel_uc_fw.h
> @@ -115,6 +115,11 @@ static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
>   	return uc_fw->path != NULL;
>   }
>   
> +static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
> +{
> +	return uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS;
> +}
> +
>   void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
>   		       struct intel_uc_fw *uc_fw);
>   int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure
  2018-02-20 22:57 [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure Michal Wajdeczko
                   ` (3 preceding siblings ...)
  2018-02-21  8:08 ` [PATCH v2 1/2] " Sagar Arun Kamble
@ 2018-02-21  8:27 ` Chris Wilson
  2018-02-22 17:30   ` Michal Wajdeczko
  2018-02-21 16:54 ` Daniele Ceraolo Spurio
  5 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2018-02-21  8:27 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx

Quoting Michal Wajdeczko (2018-02-20 22:57:10)
> Since commit 6ca9a2beb54a ("drm/i915: Unwind i915_gem_init() failure")
> we believed that we correctly handle all errors encountered during
> GuC initialization, including special one that indicates request to
> run driver with disabled GPU submission (-EIO).
> 
> Unfortunately since commit 121981fafe69 ("drm/i915/guc: Combine
> enable_guc_loading|submission modparams") we stopped using that
> error code to avoid unwanted fallback to execlist submission mode.
> 
> In result any GuC initialization failure was treated as non-recoverable
> error leading to driver load abort, so we could not even read related
> GuC error log to investigate cause of the problem.
> 
> Fix that by always returning -EIO on uC hardware related failure.
> 
> v2: don't allow -EIO from uc_init
>     don't call uc_fini[_misc] on -EIO
>     mark guc fw as failed on hw init failure
>     prepare uc_fini_hw to run after earlier -EIO
> 
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>

Whilst thinking about this, do you want to disable (or rather prevent
setup of) guc submission if the driver is already wedged?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure
  2018-02-21  8:08 ` [PATCH v2 1/2] " Sagar Arun Kamble
@ 2018-02-21 12:20   ` Michal Wajdeczko
  2018-02-21 16:55     ` Sagar Arun Kamble
  0 siblings, 1 reply; 10+ messages in thread
From: Michal Wajdeczko @ 2018-02-21 12:20 UTC (permalink / raw)
  To: intel-gfx, Sagar Arun Kamble

On Wed, 21 Feb 2018 09:08:08 +0100, Sagar Arun Kamble  
<sagar.a.kamble@intel.com> wrote:

>
>
> On 2/21/2018 4:27 AM, Michal Wajdeczko wrote:
>> Since commit 6ca9a2beb54a ("drm/i915: Unwind i915_gem_init() failure")
>> we believed that we correctly handle all errors encountered during
>> GuC initialization, including special one that indicates request to
>> run driver with disabled GPU submission (-EIO).
>>
>> Unfortunately since commit 121981fafe69 ("drm/i915/guc: Combine
>> enable_guc_loading|submission modparams") we stopped using that
>> error code to avoid unwanted fallback to execlist submission mode.
>>
>> In result any GuC initialization failure was treated as non-recoverable
>> error leading to driver load abort, so we could not even read related
>> GuC error log to investigate cause of the problem.
>>
>> Fix that by always returning -EIO on uC hardware related failure.
>>
>> v2: don't allow -EIO from uc_init
>>      don't call uc_fini[_misc] on -EIO
>>      mark guc fw as failed on hw init failure
>>      prepare uc_fini_hw to run after earlier -EIO
>>
>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c    | 13 ++++++++-----
>>   drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
>>   drivers/gpu/drm/i915/intel_uc.c    | 13 +++++++++----
>>   drivers/gpu/drm/i915/intel_uc_fw.h |  5 +++++
>>   4 files changed, 27 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c  
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 631a2db..80f23a8 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -5324,8 +5324,10 @@ int i915_gem_init(struct drm_i915_private  
>> *dev_priv)
>>   	intel_init_gt_powersave(dev_priv);
>>     	ret = intel_uc_init(dev_priv);
>> -	if (ret)
>> +	if (ret) {
>> +		GEM_BUG_ON(ret == -EIO);
>>   		goto err_pm;
>> +	}
>>     	ret = i915_gem_init_hw(dev_priv);
>>   	if (ret)
>> @@ -5372,7 +5374,8 @@ int i915_gem_init(struct drm_i915_private  
>> *dev_priv)
>>   	i915_gem_contexts_lost(dev_priv);
>>   	intel_uc_fini_hw(dev_priv);
> This uc_fini_hw should also be not called on -EIO?

This one here is fine. But I need to clear guc->fw.load_status
there to make sure we will not try to perform full fini_hw() again.

>>   err_uc_init:
>> -	intel_uc_fini(dev_priv);
>> +	if (ret != -EIO)
>> +		intel_uc_fini(dev_priv);
>>   err_pm:
>>   	if (ret != -EIO) {
>>   		intel_cleanup_gt_powersave(dev_priv);
>> @@ -5386,10 +5389,10 @@ int i915_gem_init(struct drm_i915_private  
>> *dev_priv)
>>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>>   -	intel_uc_fini_misc(dev_priv);
>> -
>> -	if (ret != -EIO)
>> +	if (ret != -EIO) {
>> +		intel_uc_fini_misc(dev_priv);
>>   		i915_gem_cleanup_userptr(dev_priv);
>> +	}
>>     	if (ret == -EIO) {
>>   		/*
> Comment here can be updated to say "Allow engines or uC initialization  
> to fail ... "

ok

>> diff --git a/drivers/gpu/drm/i915/intel_guc.h  
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 52856a9..512ff7b 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -100,6 +100,11 @@ static inline void intel_guc_notify(struct  
>> intel_guc *guc)
>>   	guc->notify(guc);
>>   }
>>   +static inline bool intel_guc_is_loaded(struct intel_guc *guc)
>> +{
>> +	return intel_uc_fw_is_loaded(&guc->fw);
>> +}
>> +
>>   /*
>>    * GuC does not allow any gfx GGTT address that falls into range [0,  
>> WOPCM_TOP),
>>    * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top  
>> address is
>> diff --git a/drivers/gpu/drm/i915/intel_uc.c  
>> b/drivers/gpu/drm/i915/intel_uc.c
>> index 9f1bac6..75d0eb9 100644
>> --- a/drivers/gpu/drm/i915/intel_uc.c
>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>> @@ -421,11 +421,13 @@ int intel_uc_init_hw(struct drm_i915_private  
>> *dev_priv)
>>   	 * Note that there is no fallback as either user explicitly asked for
>>   	 * the GuC or driver default option was to run with the GuC enabled.
>>   	 */
>> -	if (GEM_WARN_ON(ret == -EIO))
>> -		ret = -EINVAL;
>> -
>>   	dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret);
>> -	return ret;
>> +
>> +	/* Mark GuC firmware as failed to avoid redundant clean-up */
> uc_fini_hw is not redundant. uc_fini[_misc] was redundant. May be we  
> should say
> "to avoid clean-up on wedged"

ok

>> +	guc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
>> +
>> +	/* We want to disable GPU submission but keep KMS alive */
>> +	return -EIO;
>>   }
>>     void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
>> @@ -437,6 +439,9 @@ void intel_uc_fini_hw(struct drm_i915_private  
>> *dev_priv)
>>     	GEM_BUG_ON(!HAS_GUC(dev_priv));
>>   +	if (!intel_guc_is_loaded(guc))
>> +		return;
>> +
> Can we skip based on i915_terminally_wedged instead?

I'm not sure, as we declare GPU as wedged not only during gem_init()

> Similarly wedged check is needed for invoking other portion of  
> i915_gem_fini like gem_cleanup_engines, gem_contexts_fini since
> we skipped them on -EIO during gem_init.

Hmm, we guarantee that special -EIO is not triggered *before*
i915_gem_init_hw() so we correctly cleanup these in i915_gem_init()
on error, or in i915_gem_fini() on success/wedged. There is no
need to use i915_terminally_wedged for them.

>>   	if (USES_GUC_SUBMISSION(dev_priv))
>>   		intel_guc_submission_disable(guc);
>>   diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h  
>> b/drivers/gpu/drm/i915/intel_uc_fw.h
>> index d5fd460..0e3b237 100644
>> --- a/drivers/gpu/drm/i915/intel_uc_fw.h
>> +++ b/drivers/gpu/drm/i915/intel_uc_fw.h
>> @@ -115,6 +115,11 @@ static inline bool intel_uc_fw_is_selected(struct  
>> intel_uc_fw *uc_fw)
>>   	return uc_fw->path != NULL;
>>   }
>>   +static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
>> +{
>> +	return uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS;
>> +}
>> +
>>   void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
>>   		       struct intel_uc_fw *uc_fw);
>>   int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure
  2018-02-20 22:57 [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure Michal Wajdeczko
                   ` (4 preceding siblings ...)
  2018-02-21  8:27 ` Chris Wilson
@ 2018-02-21 16:54 ` Daniele Ceraolo Spurio
  5 siblings, 0 replies; 10+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-02-21 16:54 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx

<snip>

> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
> index d5fd460..0e3b237 100644
> --- a/drivers/gpu/drm/i915/intel_uc_fw.h
> +++ b/drivers/gpu/drm/i915/intel_uc_fw.h
> @@ -115,6 +115,11 @@ static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
>   	return uc_fw->path != NULL;
>   }
>   
> +static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
> +{
> +	return uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS;

Since we do not immediately update uc_fw->load_status after full GPU 
reset we have a small window of time during re-initialization where this 
function would falsely return true. We don't hit the issue in this 
patch, but I'd personally prefer not to add this function until 
uc_fw->load_status is correctly updated as we might inadvertently start 
to use it at the wrong time. Alternatively, if you want to merge this 
soon we could read the status from the HW as an initial version and then 
switch to uc_fw->load_status after we've fixed it.

Daniele

> +}
> +
>   void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
>   		       struct intel_uc_fw *uc_fw);
>   int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure
  2018-02-21 12:20   ` Michal Wajdeczko
@ 2018-02-21 16:55     ` Sagar Arun Kamble
  0 siblings, 0 replies; 10+ messages in thread
From: Sagar Arun Kamble @ 2018-02-21 16:55 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx



On 2/21/2018 5:50 PM, Michal Wajdeczko wrote:
> On Wed, 21 Feb 2018 09:08:08 +0100, Sagar Arun Kamble 
> <sagar.a.kamble@intel.com> wrote:
>
>>
>>
>> On 2/21/2018 4:27 AM, Michal Wajdeczko wrote:
>>> Since commit 6ca9a2beb54a ("drm/i915: Unwind i915_gem_init() failure")
>>> we believed that we correctly handle all errors encountered during
>>> GuC initialization, including special one that indicates request to
>>> run driver with disabled GPU submission (-EIO).
>>>
>>> Unfortunately since commit 121981fafe69 ("drm/i915/guc: Combine
>>> enable_guc_loading|submission modparams") we stopped using that
>>> error code to avoid unwanted fallback to execlist submission mode.
>>>
>>> In result any GuC initialization failure was treated as non-recoverable
>>> error leading to driver load abort, so we could not even read related
>>> GuC error log to investigate cause of the problem.
>>>
>>> Fix that by always returning -EIO on uC hardware related failure.
>>>
>>> v2: don't allow -EIO from uc_init
>>>      don't call uc_fini[_misc] on -EIO
>>>      mark guc fw as failed on hw init failure
>>>      prepare uc_fini_hw to run after earlier -EIO
>>>
>>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_gem.c    | 13 ++++++++-----
>>>   drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
>>>   drivers/gpu/drm/i915/intel_uc.c    | 13 +++++++++----
>>>   drivers/gpu/drm/i915/intel_uc_fw.h |  5 +++++
>>>   4 files changed, 27 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>>> b/drivers/gpu/drm/i915/i915_gem.c
>>> index 631a2db..80f23a8 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -5324,8 +5324,10 @@ int i915_gem_init(struct drm_i915_private 
>>> *dev_priv)
>>>       intel_init_gt_powersave(dev_priv);
>>>         ret = intel_uc_init(dev_priv);
>>> -    if (ret)
>>> +    if (ret) {
>>> +        GEM_BUG_ON(ret == -EIO);
>>>           goto err_pm;
>>> +    }
>>>         ret = i915_gem_init_hw(dev_priv);
>>>       if (ret)
>>> @@ -5372,7 +5374,8 @@ int i915_gem_init(struct drm_i915_private 
>>> *dev_priv)
>>>       i915_gem_contexts_lost(dev_priv);
>>>       intel_uc_fini_hw(dev_priv);
>> This uc_fini_hw should also be not called on -EIO?
>
> This one here is fine. But I need to clear guc->fw.load_status
> there to make sure we will not try to perform full fini_hw() again.
Yes. Will need to set load_status as FIRMWARE_FAIL.
>
>>>   err_uc_init:
>>> -    intel_uc_fini(dev_priv);
>>> +    if (ret != -EIO)
>>> +        intel_uc_fini(dev_priv);
>>>   err_pm:
>>>       if (ret != -EIO) {
>>>           intel_cleanup_gt_powersave(dev_priv);
>>> @@ -5386,10 +5389,10 @@ int i915_gem_init(struct drm_i915_private 
>>> *dev_priv)
>>>       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>>>       mutex_unlock(&dev_priv->drm.struct_mutex);
>>>   -    intel_uc_fini_misc(dev_priv);
>>> -
>>> -    if (ret != -EIO)
>>> +    if (ret != -EIO) {
>>> +        intel_uc_fini_misc(dev_priv);
>>>           i915_gem_cleanup_userptr(dev_priv);
>>> +    }
>>>         if (ret == -EIO) {
>>>           /*
>> Comment here can be updated to say "Allow engines or uC 
>> initialization to fail ... "
>
> ok
>
>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h 
>>> b/drivers/gpu/drm/i915/intel_guc.h
>>> index 52856a9..512ff7b 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>> @@ -100,6 +100,11 @@ static inline void intel_guc_notify(struct 
>>> intel_guc *guc)
>>>       guc->notify(guc);
>>>   }
>>>   +static inline bool intel_guc_is_loaded(struct intel_guc *guc)
>>> +{
>>> +    return intel_uc_fw_is_loaded(&guc->fw);
>>> +}
>>> +
>>>   /*
>>>    * GuC does not allow any gfx GGTT address that falls into range 
>>> [0, WOPCM_TOP),
>>>    * which is reserved for Boot ROM, SRAM and WOPCM. Currently this 
>>> top address is
>>> diff --git a/drivers/gpu/drm/i915/intel_uc.c 
>>> b/drivers/gpu/drm/i915/intel_uc.c
>>> index 9f1bac6..75d0eb9 100644
>>> --- a/drivers/gpu/drm/i915/intel_uc.c
>>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>>> @@ -421,11 +421,13 @@ int intel_uc_init_hw(struct drm_i915_private 
>>> *dev_priv)
>>>        * Note that there is no fallback as either user explicitly 
>>> asked for
>>>        * the GuC or driver default option was to run with the GuC 
>>> enabled.
>>>        */
>>> -    if (GEM_WARN_ON(ret == -EIO))
>>> -        ret = -EINVAL;
>>> -
>>>       dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", 
>>> ret);
>>> -    return ret;
>>> +
>>> +    /* Mark GuC firmware as failed to avoid redundant clean-up */
>> uc_fini_hw is not redundant. uc_fini[_misc] was redundant. May be we 
>> should say
>> "to avoid clean-up on wedged"
>
> ok
>
>>> +    guc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
>>> +
>>> +    /* We want to disable GPU submission but keep KMS alive */
>>> +    return -EIO;
>>>   }
>>>     void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
>>> @@ -437,6 +439,9 @@ void intel_uc_fini_hw(struct drm_i915_private 
>>> *dev_priv)
>>>         GEM_BUG_ON(!HAS_GUC(dev_priv));
>>>   +    if (!intel_guc_is_loaded(guc))
>>> +        return;
>>> +
>> Can we skip based on i915_terminally_wedged instead?
>
> I'm not sure, as we declare GPU as wedged not only during gem_init()
>
>> Similarly wedged check is needed for invoking other portion of 
>> i915_gem_fini like gem_cleanup_engines, gem_contexts_fini since
>> we skipped them on -EIO during gem_init.
>
> Hmm, we guarantee that special -EIO is not triggered *before*
> i915_gem_init_hw() so we correctly cleanup these in i915_gem_init()
> on error, or in i915_gem_fini() on success/wedged. There is no
> need to use i915_terminally_wedged for them.
>
I thought these functions might touch the GPU when wedged (GTT and 
possibly engine states). But it looks like it does not
create issues, otherwise drv_module_reload would have highlighted. Agree 
that init counterparts not returning -EIO also means
these need not be gated by wedged check.
load_status based check looks good to me.
>>>       if (USES_GUC_SUBMISSION(dev_priv))
>>>           intel_guc_submission_disable(guc);
>>>   diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h 
>>> b/drivers/gpu/drm/i915/intel_uc_fw.h
>>> index d5fd460..0e3b237 100644
>>> --- a/drivers/gpu/drm/i915/intel_uc_fw.h
>>> +++ b/drivers/gpu/drm/i915/intel_uc_fw.h
>>> @@ -115,6 +115,11 @@ static inline bool 
>>> intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
>>>       return uc_fw->path != NULL;
>>>   }
>>>   +static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
>>> +{
>>> +    return uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS;
>>> +}
>>> +
>>>   void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
>>>                  struct intel_uc_fw *uc_fw);
>>>   int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure
  2018-02-21  8:27 ` Chris Wilson
@ 2018-02-22 17:30   ` Michal Wajdeczko
  0 siblings, 0 replies; 10+ messages in thread
From: Michal Wajdeczko @ 2018-02-22 17:30 UTC (permalink / raw)
  To: intel-gfx, Chris Wilson

On Wed, 21 Feb 2018 09:27:51 +0100, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Michal Wajdeczko (2018-02-20 22:57:10)
>> Since commit 6ca9a2beb54a ("drm/i915: Unwind i915_gem_init() failure")
>> we believed that we correctly handle all errors encountered during
>> GuC initialization, including special one that indicates request to
>> run driver with disabled GPU submission (-EIO).
>>
>> Unfortunately since commit 121981fafe69 ("drm/i915/guc: Combine
>> enable_guc_loading|submission modparams") we stopped using that
>> error code to avoid unwanted fallback to execlist submission mode.
>>
>> In result any GuC initialization failure was treated as non-recoverable
>> error leading to driver load abort, so we could not even read related
>> GuC error log to investigate cause of the problem.
>>
>> Fix that by always returning -EIO on uC hardware related failure.
>>
>> v2: don't allow -EIO from uc_init
>>     don't call uc_fini[_misc] on -EIO
>>     mark guc fw as failed on hw init failure
>>     prepare uc_fini_hw to run after earlier -EIO
>>
>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>
> Whilst thinking about this, do you want to disable (or rather prevent
> setup of) guc submission if the driver is already wedged?

We setup GuC submission in intel_uc_init_hw() and just before we call
it from i915_gem_init_hw() we already have a check for wedged driver.
So maybe we don't want to add redundant guard ?

/Michal
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-02-22 17:30 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-20 22:57 [PATCH v2 1/2] drm/i915/guc: Use correct error code for GuC initialization failure Michal Wajdeczko
2018-02-20 22:57 ` [PATCH v2 2/2] HAX: Enable GuC for CI Michal Wajdeczko
2018-02-20 23:36 ` ✓ Fi.CI.BAT: success for series starting with [v2,1/2] drm/i915/guc: Use correct error code for GuC initialization failure Patchwork
2018-02-21  7:18 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-02-21  8:08 ` [PATCH v2 1/2] " Sagar Arun Kamble
2018-02-21 12:20   ` Michal Wajdeczko
2018-02-21 16:55     ` Sagar Arun Kamble
2018-02-21  8:27 ` Chris Wilson
2018-02-22 17:30   ` Michal Wajdeczko
2018-02-21 16:54 ` Daniele Ceraolo Spurio

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.