All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
@ 2018-02-23 14:04 Michal Wajdeczko
  2018-02-23 14:40 ` ✓ Fi.CI.BAT: success for drm/i915/uc: Start preparing GuC/HuC for reset (rev2) Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Michal Wajdeczko @ 2018-02-23 14:04 UTC (permalink / raw)
  To: intel-gfx

Right after GPU reset there will be a small window of time during which
some of GuC/HuC fields will still show state before reset. Let's start
to fix that by sanitizing firmware status as we will use it shortly.

v2: s/reset_prepare/prepare_to_reset (Michel)
    don't forget about gem_sanitize path (Daniele)

Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c    |  5 ++++-
 drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
 drivers/gpu/drm/i915/intel_huc.h   |  5 +++++
 drivers/gpu/drm/i915/intel_uc.c    | 14 ++++++++++++++
 drivers/gpu/drm/i915/intel_uc.h    |  1 +
 drivers/gpu/drm/i915/intel_uc_fw.h |  6 ++++++
 6 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 14c855b..ae2c4ba 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2981,6 +2981,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 	}
 
 	i915_gem_revoke_fences(dev_priv);
+	intel_uc_prepare_to_reset(dev_priv);
 
 	return err;
 }
@@ -4881,8 +4882,10 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * it may impact the display and we are uncertain about the stability
 	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
+	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
+		intel_uc_prepare_to_reset(i915);
 		WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
+	}
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 52856a9..0f6adb1 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -132,4 +132,9 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
 u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
 
+static inline void intel_guc_prepare_to_reset(struct intel_guc *guc)
+{
+	intel_uc_fw_prepare_to_reset(&guc->fw);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h
index 40039db..96e24f9 100644
--- a/drivers/gpu/drm/i915/intel_huc.h
+++ b/drivers/gpu/drm/i915/intel_huc.h
@@ -38,4 +38,9 @@ struct intel_huc {
 int intel_huc_init_hw(struct intel_huc *huc);
 int intel_huc_auth(struct intel_huc *huc);
 
+static inline void intel_huc_prepare_to_reset(struct intel_huc *huc)
+{
+	intel_uc_fw_prepare_to_reset(&huc->fw);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 9f1bac6..8042d4b 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -445,3 +445,17 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 	if (USES_GUC_SUBMISSION(dev_priv))
 		gen9_disable_guc_interrupts(dev_priv);
 }
+
+void intel_uc_prepare_to_reset(struct drm_i915_private *i915)
+{
+	struct intel_huc *huc = &i915->huc;
+	struct intel_guc *guc = &i915->guc;
+
+	if (!USES_GUC(i915))
+		return;
+
+	GEM_BUG_ON(!HAS_GUC(i915));
+
+	intel_huc_prepare_to_reset(huc);
+	intel_guc_prepare_to_reset(guc);
+}
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index f2984e0..7a8ae58 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -39,6 +39,7 @@
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_uc_init(struct drm_i915_private *dev_priv);
 void intel_uc_fini(struct drm_i915_private *dev_priv);
+void intel_uc_prepare_to_reset(struct drm_i915_private *dev_priv);
 
 static inline bool intel_uc_is_using_guc(void)
 {
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index d5fd460..f1ee653 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -115,6 +115,12 @@ static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
 	return uc_fw->path != NULL;
 }
 
+static inline void intel_uc_fw_prepare_to_reset(struct intel_uc_fw *uc_fw)
+{
+	if (uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS)
+		uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING;
+}
+
 void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		       struct intel_uc_fw *uc_fw);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/uc: Start preparing GuC/HuC for reset (rev2)
  2018-02-23 14:04 [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset Michal Wajdeczko
@ 2018-02-23 14:40 ` Patchwork
  2018-02-23 15:38 ` ✗ Fi.CI.IGT: warning " Patchwork
  2018-02-23 17:01 ` [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset Daniele Ceraolo Spurio
  2 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2018-02-23 14:40 UTC (permalink / raw)
  To: Michal Wajdeczko; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/uc: Start preparing GuC/HuC for reset (rev2)
URL   : https://patchwork.freedesktop.org/series/38805/
State : success

== Summary ==

Series 38805v2 drm/i915/uc: Start preparing GuC/HuC for reset
https://patchwork.freedesktop.org/api/1.0/series/38805/revisions/2/mbox/

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:418s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:425s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:375s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:487s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:285s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:481s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:483s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:469s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:457s
fi-cfl-8700k     total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:390s
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:564s
fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:416s
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:287s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:509s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:385s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:410s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:452s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:452s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:494s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:450s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:492s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:593s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:425s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:502s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:522s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:481s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:484s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:407s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:433s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:533s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:393s

562dc33a969ded94e63f6fd1d76eb42d344052fb drm-tip: 2018y-02m-23d-09h-04m-20s UTC integration manifest
d2c3f7fe851c drm/i915/uc: Start preparing GuC/HuC for reset

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8142/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* ✗ Fi.CI.IGT: warning for drm/i915/uc: Start preparing GuC/HuC for reset (rev2)
  2018-02-23 14:04 [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset Michal Wajdeczko
  2018-02-23 14:40 ` ✓ Fi.CI.BAT: success for drm/i915/uc: Start preparing GuC/HuC for reset (rev2) Patchwork
@ 2018-02-23 15:38 ` Patchwork
  2018-02-23 17:01 ` [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset Daniele Ceraolo Spurio
  2 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2018-02-23 15:38 UTC (permalink / raw)
  To: Michal Wajdeczko; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/uc: Start preparing GuC/HuC for reset (rev2)
URL   : https://patchwork.freedesktop.org/series/38805/
State : warning

== Summary ==

Test kms_flip:
        Subgroup dpms-vs-vblank-race-interruptible:
                fail       -> PASS       (shard-hsw) fdo#103060 +1
Test kms_sysfs_edid_timing:
                warn       -> PASS       (shard-apl) fdo#100047
Test kms_cursor_crc:
        Subgroup cursor-64x64-suspend:
                incomplete -> PASS       (shard-hsw) fdo#103540
Test kms_vblank:
        Subgroup pipe-a-query-busy-hang:
                pass       -> SKIP       (shard-snb)

fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047
fdo#103540 https://bugs.freedesktop.org/show_bug.cgi?id=103540

shard-apl        total:3465 pass:1821 dwarn:1   dfail:0   fail:12  skip:1631 time:12347s
shard-hsw        total:3465 pass:1768 dwarn:1   dfail:0   fail:2   skip:1693 time:11830s
shard-snb        total:3465 pass:1356 dwarn:1   dfail:0   fail:3   skip:2105 time:6599s
Blacklisted hosts:
shard-kbl        total:3399 pass:1921 dwarn:1   dfail:0   fail:14  skip:1462 time:9184s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8142/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
  2018-02-23 14:04 [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset Michal Wajdeczko
  2018-02-23 14:40 ` ✓ Fi.CI.BAT: success for drm/i915/uc: Start preparing GuC/HuC for reset (rev2) Patchwork
  2018-02-23 15:38 ` ✗ Fi.CI.IGT: warning " Patchwork
@ 2018-02-23 17:01 ` Daniele Ceraolo Spurio
  2018-02-26  6:17   ` Sagar Arun Kamble
  2 siblings, 1 reply; 11+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-02-23 17:01 UTC (permalink / raw)
  To: Michal Wajdeczko, intel-gfx



On 23/02/18 06:04, Michal Wajdeczko wrote:
> Right after GPU reset there will be a small window of time during which
> some of GuC/HuC fields will still show state before reset. Let's start
> to fix that by sanitizing firmware status as we will use it shortly.
> 
> v2: s/reset_prepare/prepare_to_reset (Michel)
>      don't forget about gem_sanitize path (Daniele)
> 
> Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michel Thierry <michel.thierry@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c    |  5 ++++-
>   drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
>   drivers/gpu/drm/i915/intel_huc.h   |  5 +++++
>   drivers/gpu/drm/i915/intel_uc.c    | 14 ++++++++++++++
>   drivers/gpu/drm/i915/intel_uc.h    |  1 +
>   drivers/gpu/drm/i915/intel_uc_fw.h |  6 ++++++
>   6 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 14c855b..ae2c4ba 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2981,6 +2981,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
>   	}
>   
>   	i915_gem_revoke_fences(dev_priv);
> +	intel_uc_prepare_to_reset(dev_priv);
>   
>   	return err;
>   }
> @@ -4881,8 +4882,10 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>   	 * it may impact the display and we are uncertain about the stability
>   	 * of the reset, so this could be applied to even earlier gen.
>   	 */
> -	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
> +	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
> +		intel_uc_prepare_to_reset(i915);

This leaves the status with an incorrect value if we boot with 
i915.reset=0, but I think this is still the right place to add this in. 
There are several things with GuC that are going to break if we use 
reset=0 (e.g. doorbell cleanup) so I wouldn't consider this a 
regression, but we might want to start sanitizing the modparams to not 
allow reset=0 with GuC.

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Daniele

>   		WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
> +	}
>   }
>   
>   int i915_gem_suspend(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 52856a9..0f6adb1 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -132,4 +132,9 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
>   struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
>   u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
>   
> +static inline void intel_guc_prepare_to_reset(struct intel_guc *guc)
> +{
> +	intel_uc_fw_prepare_to_reset(&guc->fw);
> +}
> +
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h
> index 40039db..96e24f9 100644
> --- a/drivers/gpu/drm/i915/intel_huc.h
> +++ b/drivers/gpu/drm/i915/intel_huc.h
> @@ -38,4 +38,9 @@ struct intel_huc {
>   int intel_huc_init_hw(struct intel_huc *huc);
>   int intel_huc_auth(struct intel_huc *huc);
>   
> +static inline void intel_huc_prepare_to_reset(struct intel_huc *huc)
> +{
> +	intel_uc_fw_prepare_to_reset(&huc->fw);
> +}
> +
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 9f1bac6..8042d4b 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -445,3 +445,17 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
>   	if (USES_GUC_SUBMISSION(dev_priv))
>   		gen9_disable_guc_interrupts(dev_priv);
>   }
> +
> +void intel_uc_prepare_to_reset(struct drm_i915_private *i915)
> +{
> +	struct intel_huc *huc = &i915->huc;
> +	struct intel_guc *guc = &i915->guc;
> +
> +	if (!USES_GUC(i915))
> +		return;
> +
> +	GEM_BUG_ON(!HAS_GUC(i915));
> +
> +	intel_huc_prepare_to_reset(huc);
> +	intel_guc_prepare_to_reset(guc);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index f2984e0..7a8ae58 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -39,6 +39,7 @@
>   void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
>   int intel_uc_init(struct drm_i915_private *dev_priv);
>   void intel_uc_fini(struct drm_i915_private *dev_priv);
> +void intel_uc_prepare_to_reset(struct drm_i915_private *dev_priv);
>   
>   static inline bool intel_uc_is_using_guc(void)
>   {
> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
> index d5fd460..f1ee653 100644
> --- a/drivers/gpu/drm/i915/intel_uc_fw.h
> +++ b/drivers/gpu/drm/i915/intel_uc_fw.h
> @@ -115,6 +115,12 @@ static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
>   	return uc_fw->path != NULL;
>   }
>   
> +static inline void intel_uc_fw_prepare_to_reset(struct intel_uc_fw *uc_fw)
> +{
> +	if (uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS)
> +		uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING;
> +}
> +
>   void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
>   		       struct intel_uc_fw *uc_fw);
>   int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
  2018-02-23 17:01 ` [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset Daniele Ceraolo Spurio
@ 2018-02-26  6:17   ` Sagar Arun Kamble
  2018-02-26 16:57     ` Daniele Ceraolo Spurio
  0 siblings, 1 reply; 11+ messages in thread
From: Sagar Arun Kamble @ 2018-02-26  6:17 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio, Michal Wajdeczko, intel-gfx



On 2/23/2018 10:31 PM, Daniele Ceraolo Spurio wrote:
>
>
> On 23/02/18 06:04, Michal Wajdeczko wrote:
>> Right after GPU reset there will be a small window of time during which
>> some of GuC/HuC fields will still show state before reset. Let's start
>> to fix that by sanitizing firmware status as we will use it shortly.
>>
>> v2: s/reset_prepare/prepare_to_reset (Michel)
>>      don't forget about gem_sanitize path (Daniele)
>>
>> Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Michel Thierry <michel.thierry@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c    |  5 ++++-
>>   drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
>>   drivers/gpu/drm/i915/intel_huc.h   |  5 +++++
>>   drivers/gpu/drm/i915/intel_uc.c    | 14 ++++++++++++++
>>   drivers/gpu/drm/i915/intel_uc.h    |  1 +
>>   drivers/gpu/drm/i915/intel_uc_fw.h |  6 ++++++
>>   6 files changed, 35 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 14c855b..ae2c4ba 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2981,6 +2981,7 @@ int i915_gem_reset_prepare(struct 
>> drm_i915_private *dev_priv)
>>       }
>>         i915_gem_revoke_fences(dev_priv);
>> +    intel_uc_prepare_to_reset(dev_priv);
>>         return err;
>>   }
>> @@ -4881,8 +4882,10 @@ void i915_gem_sanitize(struct drm_i915_private 
>> *i915)
>>        * it may impact the display and we are uncertain about the 
>> stability
>>        * of the reset, so this could be applied to even earlier gen.
>>        */
>> -    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
>> +    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
>> +        intel_uc_prepare_to_reset(i915);
>
> This leaves the status with an incorrect value if we boot with 
> i915.reset=0, 
It depends on whether WOPCM is locked (In case of resume from S3 I have 
seen it to be the case often).
Then we need not reload GuC also unless we are not doing full GPU reset.
> but I think this is still the right place to add this in. 
Yes
> There are several things with GuC that are going to break if we use 
> reset=0 (e.g. doorbell cleanup) 
Can you elaborate how it might break.
i915 isn't currently communicating to GuC (destroy_doorbell) during 
doorbell cleanup and if we start communicating then it should
not fail as GuC will be available with reset=0.  Also 
__intel_uc_reset_hw isn't gated by reset modparam.
> so I wouldn't consider this a regression, but we might want to start 
> sanitizing the modparams to not allow reset=0 with GuC.
>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>
> Daniele
>
>>           WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
>> +    }
>>   }
>>     int i915_gem_suspend(struct drm_i915_private *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h 
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 52856a9..0f6adb1 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -132,4 +132,9 @@ static inline u32 guc_ggtt_offset(struct i915_vma 
>> *vma)
>>   struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 
>> size);
>>   u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
>>   +static inline void intel_guc_prepare_to_reset(struct intel_guc *guc)
>> +{
>> +    intel_uc_fw_prepare_to_reset(&guc->fw);
>> +}
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/intel_huc.h 
>> b/drivers/gpu/drm/i915/intel_huc.h
>> index 40039db..96e24f9 100644
>> --- a/drivers/gpu/drm/i915/intel_huc.h
>> +++ b/drivers/gpu/drm/i915/intel_huc.h
>> @@ -38,4 +38,9 @@ struct intel_huc {
>>   int intel_huc_init_hw(struct intel_huc *huc);
>>   int intel_huc_auth(struct intel_huc *huc);
>>   +static inline void intel_huc_prepare_to_reset(struct intel_huc *huc)
>> +{
>> +    intel_uc_fw_prepare_to_reset(&huc->fw);
>> +}
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/intel_uc.c 
>> b/drivers/gpu/drm/i915/intel_uc.c
>> index 9f1bac6..8042d4b 100644
>> --- a/drivers/gpu/drm/i915/intel_uc.c
>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>> @@ -445,3 +445,17 @@ void intel_uc_fini_hw(struct drm_i915_private 
>> *dev_priv)
>>       if (USES_GUC_SUBMISSION(dev_priv))
>>           gen9_disable_guc_interrupts(dev_priv);
>>   }
>> +
>> +void intel_uc_prepare_to_reset(struct drm_i915_private *i915)
>> +{
>> +    struct intel_huc *huc = &i915->huc;
>> +    struct intel_guc *guc = &i915->guc;
>> +
>> +    if (!USES_GUC(i915))
>> +        return;
>> +
>> +    GEM_BUG_ON(!HAS_GUC(i915));
>> +
>> +    intel_huc_prepare_to_reset(huc);
>> +    intel_guc_prepare_to_reset(guc);
>> +}
>> diff --git a/drivers/gpu/drm/i915/intel_uc.h 
>> b/drivers/gpu/drm/i915/intel_uc.h
>> index f2984e0..7a8ae58 100644
>> --- a/drivers/gpu/drm/i915/intel_uc.h
>> +++ b/drivers/gpu/drm/i915/intel_uc.h
>> @@ -39,6 +39,7 @@
>>   void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
>>   int intel_uc_init(struct drm_i915_private *dev_priv);
>>   void intel_uc_fini(struct drm_i915_private *dev_priv);
>> +void intel_uc_prepare_to_reset(struct drm_i915_private *dev_priv);
>>     static inline bool intel_uc_is_using_guc(void)
>>   {
>> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h 
>> b/drivers/gpu/drm/i915/intel_uc_fw.h
>> index d5fd460..f1ee653 100644
>> --- a/drivers/gpu/drm/i915/intel_uc_fw.h
>> +++ b/drivers/gpu/drm/i915/intel_uc_fw.h
>> @@ -115,6 +115,12 @@ static inline bool 
>> intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
>>       return uc_fw->path != NULL;
>>   }
>>   +static inline void intel_uc_fw_prepare_to_reset(struct intel_uc_fw 
>> *uc_fw)
>> +{
>> +    if (uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS)
>> +        uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING;
>> +}
>> +
>>   void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
>>                  struct intel_uc_fw *uc_fw);
>>   int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>>

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
  2018-02-26  6:17   ` Sagar Arun Kamble
@ 2018-02-26 16:57     ` Daniele Ceraolo Spurio
  2018-02-26 20:52       ` Chris Wilson
  2018-02-27  7:07       ` Sagar Arun Kamble
  0 siblings, 2 replies; 11+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-02-26 16:57 UTC (permalink / raw)
  To: Sagar Arun Kamble, Michal Wajdeczko, intel-gfx



On 25/02/18 22:17, Sagar Arun Kamble wrote:
> 
> 
> On 2/23/2018 10:31 PM, Daniele Ceraolo Spurio wrote:
>>
>>
>> On 23/02/18 06:04, Michal Wajdeczko wrote:
>>> Right after GPU reset there will be a small window of time during which
>>> some of GuC/HuC fields will still show state before reset. Let's start
>>> to fix that by sanitizing firmware status as we will use it shortly.
>>>
>>> v2: s/reset_prepare/prepare_to_reset (Michel)
>>>      don't forget about gem_sanitize path (Daniele)
>>>
>>> Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Michel Thierry <michel.thierry@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_gem.c    |  5 ++++-
>>>   drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
>>>   drivers/gpu/drm/i915/intel_huc.h   |  5 +++++
>>>   drivers/gpu/drm/i915/intel_uc.c    | 14 ++++++++++++++
>>>   drivers/gpu/drm/i915/intel_uc.h    |  1 +
>>>   drivers/gpu/drm/i915/intel_uc_fw.h |  6 ++++++
>>>   6 files changed, 35 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>>> b/drivers/gpu/drm/i915/i915_gem.c
>>> index 14c855b..ae2c4ba 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -2981,6 +2981,7 @@ int i915_gem_reset_prepare(struct 
>>> drm_i915_private *dev_priv)
>>>       }
>>>         i915_gem_revoke_fences(dev_priv);
>>> +    intel_uc_prepare_to_reset(dev_priv);
>>>         return err;
>>>   }
>>> @@ -4881,8 +4882,10 @@ void i915_gem_sanitize(struct drm_i915_private 
>>> *i915)
>>>        * it may impact the display and we are uncertain about the 
>>> stability
>>>        * of the reset, so this could be applied to even earlier gen.
>>>        */
>>> -    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
>>> +    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
>>> +        intel_uc_prepare_to_reset(i915);
>>
>> This leaves the status with an incorrect value if we boot with 
>> i915.reset=0, 
> It depends on whether WOPCM is locked (In case of resume from S3 I have 
> seen it to be the case often).
> Then we need not reload GuC also unless we are not doing full GPU reset.
>> but I think this is still the right place to add this in. 
> Yes
>> There are several things with GuC that are going to break if we use 
>> reset=0 (e.g. doorbell cleanup) 
> Can you elaborate how it might break.
> i915 isn't currently communicating to GuC (destroy_doorbell) during 
> doorbell cleanup and if we start communicating then it should
> not fail as GuC will be available with reset=0.  Also 
> __intel_uc_reset_hw isn't gated by reset modparam.

As you said we do always reset GuC no matter the value of the modparam, 
but that does not reset the doorbell HW. If we're coming out of S3 and 
the state as been preserved this will cause the doorbell HW to be left 
in an unclean state, which could cause spurious doorbell interrupts to 
be sent to GuC, not sure how the firmware handles those. The code as 
moved since last time I looked at this in detail and I think we're now 
most likely going to overwrite those unclean doorbells, but there are 
unlikely corner cases (preempt context failing to be created) where we 
might not do so.
More generally, my concern was that in the code flow we assume GuC and 
related HW to be reset and in need of a re-init when we come out of 
suspend when actually as you reported that might not be the case if we 
have reset=0. Even if we have no major concerns now, issues might arise 
in the future after code reworks or new feature additions if we start 
from a wrong assumption. Instead of changing the flow to consider the 
reset=0 (which isn't really a supported scenario) I think it'd be more 
useful to just enforce the fact that we don't support that use-case with 
GuC, hence my suggestion. And yes, I'm probably just being uber-paranoid :P

Daniele

>> so I wouldn't consider this a regression, but we might want to start 
>> sanitizing the modparams to not allow reset=0 with GuC.
>>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>
>> Daniele
>>
>>>           WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
>>> +    }
>>>   }
>>>     int i915_gem_suspend(struct drm_i915_private *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h 
>>> b/drivers/gpu/drm/i915/intel_guc.h
>>> index 52856a9..0f6adb1 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>> @@ -132,4 +132,9 @@ static inline u32 guc_ggtt_offset(struct i915_vma 
>>> *vma)
>>>   struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 
>>> size);
>>>   u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
>>>   +static inline void intel_guc_prepare_to_reset(struct intel_guc *guc)
>>> +{
>>> +    intel_uc_fw_prepare_to_reset(&guc->fw);
>>> +}
>>> +
>>>   #endif
>>> diff --git a/drivers/gpu/drm/i915/intel_huc.h 
>>> b/drivers/gpu/drm/i915/intel_huc.h
>>> index 40039db..96e24f9 100644
>>> --- a/drivers/gpu/drm/i915/intel_huc.h
>>> +++ b/drivers/gpu/drm/i915/intel_huc.h
>>> @@ -38,4 +38,9 @@ struct intel_huc {
>>>   int intel_huc_init_hw(struct intel_huc *huc);
>>>   int intel_huc_auth(struct intel_huc *huc);
>>>   +static inline void intel_huc_prepare_to_reset(struct intel_huc *huc)
>>> +{
>>> +    intel_uc_fw_prepare_to_reset(&huc->fw);
>>> +}
>>> +
>>>   #endif
>>> diff --git a/drivers/gpu/drm/i915/intel_uc.c 
>>> b/drivers/gpu/drm/i915/intel_uc.c
>>> index 9f1bac6..8042d4b 100644
>>> --- a/drivers/gpu/drm/i915/intel_uc.c
>>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>>> @@ -445,3 +445,17 @@ void intel_uc_fini_hw(struct drm_i915_private 
>>> *dev_priv)
>>>       if (USES_GUC_SUBMISSION(dev_priv))
>>>           gen9_disable_guc_interrupts(dev_priv);
>>>   }
>>> +
>>> +void intel_uc_prepare_to_reset(struct drm_i915_private *i915)
>>> +{
>>> +    struct intel_huc *huc = &i915->huc;
>>> +    struct intel_guc *guc = &i915->guc;
>>> +
>>> +    if (!USES_GUC(i915))
>>> +        return;
>>> +
>>> +    GEM_BUG_ON(!HAS_GUC(i915));
>>> +
>>> +    intel_huc_prepare_to_reset(huc);
>>> +    intel_guc_prepare_to_reset(guc);
>>> +}
>>> diff --git a/drivers/gpu/drm/i915/intel_uc.h 
>>> b/drivers/gpu/drm/i915/intel_uc.h
>>> index f2984e0..7a8ae58 100644
>>> --- a/drivers/gpu/drm/i915/intel_uc.h
>>> +++ b/drivers/gpu/drm/i915/intel_uc.h
>>> @@ -39,6 +39,7 @@
>>>   void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
>>>   int intel_uc_init(struct drm_i915_private *dev_priv);
>>>   void intel_uc_fini(struct drm_i915_private *dev_priv);
>>> +void intel_uc_prepare_to_reset(struct drm_i915_private *dev_priv);
>>>     static inline bool intel_uc_is_using_guc(void)
>>>   {
>>> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h 
>>> b/drivers/gpu/drm/i915/intel_uc_fw.h
>>> index d5fd460..f1ee653 100644
>>> --- a/drivers/gpu/drm/i915/intel_uc_fw.h
>>> +++ b/drivers/gpu/drm/i915/intel_uc_fw.h
>>> @@ -115,6 +115,12 @@ static inline bool 
>>> intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
>>>       return uc_fw->path != NULL;
>>>   }
>>>   +static inline void intel_uc_fw_prepare_to_reset(struct intel_uc_fw 
>>> *uc_fw)
>>> +{
>>> +    if (uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS)
>>> +        uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING;
>>> +}
>>> +
>>>   void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
>>>                  struct intel_uc_fw *uc_fw);
>>>   int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>>>
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
  2018-02-26 16:57     ` Daniele Ceraolo Spurio
@ 2018-02-26 20:52       ` Chris Wilson
  2018-02-27  6:54         ` Sagar Arun Kamble
  2018-02-27  7:07       ` Sagar Arun Kamble
  1 sibling, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2018-02-26 20:52 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio, Sagar Arun Kamble, Michal Wajdeczko, intel-gfx

Quoting Daniele Ceraolo Spurio (2018-02-26 16:57:11)
> 
> 
> On 25/02/18 22:17, Sagar Arun Kamble wrote:
> > 
> > 
> > On 2/23/2018 10:31 PM, Daniele Ceraolo Spurio wrote:
> >>
> >>
> >> On 23/02/18 06:04, Michal Wajdeczko wrote:
> >>> Right after GPU reset there will be a small window of time during which
> >>> some of GuC/HuC fields will still show state before reset. Let's start
> >>> to fix that by sanitizing firmware status as we will use it shortly.
> >>>
> >>> v2: s/reset_prepare/prepare_to_reset (Michel)
> >>>      don't forget about gem_sanitize path (Daniele)
> >>>
> >>> Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> >>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> >>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> >>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> >>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Michel Thierry <michel.thierry@intel.com>
> >>> ---
> >>>   drivers/gpu/drm/i915/i915_gem.c    |  5 ++++-
> >>>   drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
> >>>   drivers/gpu/drm/i915/intel_huc.h   |  5 +++++
> >>>   drivers/gpu/drm/i915/intel_uc.c    | 14 ++++++++++++++
> >>>   drivers/gpu/drm/i915/intel_uc.h    |  1 +
> >>>   drivers/gpu/drm/i915/intel_uc_fw.h |  6 ++++++
> >>>   6 files changed, 35 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> >>> b/drivers/gpu/drm/i915/i915_gem.c
> >>> index 14c855b..ae2c4ba 100644
> >>> --- a/drivers/gpu/drm/i915/i915_gem.c
> >>> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >>> @@ -2981,6 +2981,7 @@ int i915_gem_reset_prepare(struct 
> >>> drm_i915_private *dev_priv)
> >>>       }
> >>>         i915_gem_revoke_fences(dev_priv);
> >>> +    intel_uc_prepare_to_reset(dev_priv);
> >>>         return err;
> >>>   }
> >>> @@ -4881,8 +4882,10 @@ void i915_gem_sanitize(struct drm_i915_private 
> >>> *i915)
> >>>        * it may impact the display and we are uncertain about the 
> >>> stability
> >>>        * of the reset, so this could be applied to even earlier gen.
> >>>        */
> >>> -    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
> >>> +    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
> >>> +        intel_uc_prepare_to_reset(i915);
> >>
> >> This leaves the status with an incorrect value if we boot with 
> >> i915.reset=0, 
> > It depends on whether WOPCM is locked (In case of resume from S3 I have 
> > seen it to be the case often).
> > Then we need not reload GuC also unless we are not doing full GPU reset.
> >> but I think this is still the right place to add this in. 
> > Yes
> >> There are several things with GuC that are going to break if we use 
> >> reset=0 (e.g. doorbell cleanup) 
> > Can you elaborate how it might break.
> > i915 isn't currently communicating to GuC (destroy_doorbell) during 
> > doorbell cleanup and if we start communicating then it should
> > not fail as GuC will be available with reset=0.  Also 
> > __intel_uc_reset_hw isn't gated by reset modparam.
> 
> As you said we do always reset GuC no matter the value of the modparam, 
> but that does not reset the doorbell HW. If we're coming out of S3 and 
> the state as been preserved this will cause the doorbell HW to be left 
> in an unclean state, which could cause spurious doorbell interrupts to 
> be sent to GuC, not sure how the firmware handles those. The code as 
> moved since last time I looked at this in detail and I think we're now 
> most likely going to overwrite those unclean doorbells, but there are 
> unlikely corner cases (preempt context failing to be created) where we 
> might not do so.

I'm still going "wait, we can put the device into D3 and the GuC is
still powered?" Something feels wrong if the GuC retains state after the
HW is powered down. (So I'm wondering why this isn't just part of the
normal guc init path for module load/resume.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
  2018-02-26 20:52       ` Chris Wilson
@ 2018-02-27  6:54         ` Sagar Arun Kamble
  2018-02-27  7:50           ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Sagar Arun Kamble @ 2018-02-27  6:54 UTC (permalink / raw)
  To: Chris Wilson, Daniele Ceraolo Spurio, Michal Wajdeczko, intel-gfx



On 2/27/2018 2:22 AM, Chris Wilson wrote:
> Quoting Daniele Ceraolo Spurio (2018-02-26 16:57:11)
>>
>> On 25/02/18 22:17, Sagar Arun Kamble wrote:
>>>
>>> On 2/23/2018 10:31 PM, Daniele Ceraolo Spurio wrote:
>>>>
>>>> On 23/02/18 06:04, Michal Wajdeczko wrote:
>>>>> Right after GPU reset there will be a small window of time during which
>>>>> some of GuC/HuC fields will still show state before reset. Let's start
>>>>> to fix that by sanitizing firmware status as we will use it shortly.
>>>>>
>>>>> v2: s/reset_prepare/prepare_to_reset (Michel)
>>>>>       don't forget about gem_sanitize path (Daniele)
>>>>>
>>>>> Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>>>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>>>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Michel Thierry <michel.thierry@intel.com>
>>>>> ---
>>>>>    drivers/gpu/drm/i915/i915_gem.c    |  5 ++++-
>>>>>    drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
>>>>>    drivers/gpu/drm/i915/intel_huc.h   |  5 +++++
>>>>>    drivers/gpu/drm/i915/intel_uc.c    | 14 ++++++++++++++
>>>>>    drivers/gpu/drm/i915/intel_uc.h    |  1 +
>>>>>    drivers/gpu/drm/i915/intel_uc_fw.h |  6 ++++++
>>>>>    6 files changed, 35 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>>>>> b/drivers/gpu/drm/i915/i915_gem.c
>>>>> index 14c855b..ae2c4ba 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>>> @@ -2981,6 +2981,7 @@ int i915_gem_reset_prepare(struct
>>>>> drm_i915_private *dev_priv)
>>>>>        }
>>>>>          i915_gem_revoke_fences(dev_priv);
>>>>> +    intel_uc_prepare_to_reset(dev_priv);
>>>>>          return err;
>>>>>    }
>>>>> @@ -4881,8 +4882,10 @@ void i915_gem_sanitize(struct drm_i915_private
>>>>> *i915)
>>>>>         * it may impact the display and we are uncertain about the
>>>>> stability
>>>>>         * of the reset, so this could be applied to even earlier gen.
>>>>>         */
>>>>> -    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
>>>>> +    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
>>>>> +        intel_uc_prepare_to_reset(i915);
>>>> This leaves the status with an incorrect value if we boot with
>>>> i915.reset=0,
>>> It depends on whether WOPCM is locked (In case of resume from S3 I have
>>> seen it to be the case often).
>>> Then we need not reload GuC also unless we are not doing full GPU reset.
>>>> but I think this is still the right place to add this in.
>>> Yes
>>>> There are several things with GuC that are going to break if we use
>>>> reset=0 (e.g. doorbell cleanup)
>>> Can you elaborate how it might break.
>>> i915 isn't currently communicating to GuC (destroy_doorbell) during
>>> doorbell cleanup and if we start communicating then it should
>>> not fail as GuC will be available with reset=0.  Also
>>> __intel_uc_reset_hw isn't gated by reset modparam.
>> As you said we do always reset GuC no matter the value of the modparam,
>> but that does not reset the doorbell HW. If we're coming out of S3 and
>> the state as been preserved this will cause the doorbell HW to be left
>> in an unclean state, which could cause spurious doorbell interrupts to
>> be sent to GuC, not sure how the firmware handles those. The code as
>> moved since last time I looked at this in detail and I think we're now
>> most likely going to overwrite those unclean doorbells, but there are
>> unlikely corner cases (preempt context failing to be created) where we
>> might not do so.
> I'm still going "wait, we can put the device into D3 and the GuC is
> still powered?" Something feels wrong if the GuC retains state after the
> HW is powered down.
GuC will be powered down, with RC6. Just that firmware in WOPCM can get 
wiped off if
memory is reset/powered down during resume. In case of mem sleep 
generally WOPCM stays intact and if we exit
RC6 on resume from sleep, firmware will be restored into GuC without 
driver intervention.
But since we do full GPU reset as part of sanitize we have to load it 
from driver again.
>   (So I'm wondering why this isn't just part of the
> normal guc init path for module load/resume.)
> -Chris

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
  2018-02-26 16:57     ` Daniele Ceraolo Spurio
  2018-02-26 20:52       ` Chris Wilson
@ 2018-02-27  7:07       ` Sagar Arun Kamble
  1 sibling, 0 replies; 11+ messages in thread
From: Sagar Arun Kamble @ 2018-02-27  7:07 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio, Michal Wajdeczko, intel-gfx



On 2/26/2018 10:27 PM, Daniele Ceraolo Spurio wrote:
>
>
> On 25/02/18 22:17, Sagar Arun Kamble wrote:
>>
>>
>> On 2/23/2018 10:31 PM, Daniele Ceraolo Spurio wrote:
>>>
>>>
>>> On 23/02/18 06:04, Michal Wajdeczko wrote:
>>>> Right after GPU reset there will be a small window of time during 
>>>> which
>>>> some of GuC/HuC fields will still show state before reset. Let's start
>>>> to fix that by sanitizing firmware status as we will use it shortly.
>>>>
>>>> v2: s/reset_prepare/prepare_to_reset (Michel)
>>>>      don't forget about gem_sanitize path (Daniele)
>>>>
>>>> Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>>> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Michel Thierry <michel.thierry@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_gem.c    |  5 ++++-
>>>>   drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
>>>>   drivers/gpu/drm/i915/intel_huc.h   |  5 +++++
>>>>   drivers/gpu/drm/i915/intel_uc.c    | 14 ++++++++++++++
>>>>   drivers/gpu/drm/i915/intel_uc.h    |  1 +
>>>>   drivers/gpu/drm/i915/intel_uc_fw.h |  6 ++++++
>>>>   6 files changed, 35 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>>>> b/drivers/gpu/drm/i915/i915_gem.c
>>>> index 14c855b..ae2c4ba 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>> @@ -2981,6 +2981,7 @@ int i915_gem_reset_prepare(struct 
>>>> drm_i915_private *dev_priv)
>>>>       }
>>>>         i915_gem_revoke_fences(dev_priv);
>>>> +    intel_uc_prepare_to_reset(dev_priv);
>>>>         return err;
>>>>   }
>>>> @@ -4881,8 +4882,10 @@ void i915_gem_sanitize(struct 
>>>> drm_i915_private *i915)
>>>>        * it may impact the display and we are uncertain about the 
>>>> stability
>>>>        * of the reset, so this could be applied to even earlier gen.
>>>>        */
>>>> -    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
>>>> +    if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) {
>>>> +        intel_uc_prepare_to_reset(i915);
>>>
>>> This leaves the status with an incorrect value if we boot with 
>>> i915.reset=0, 
>> It depends on whether WOPCM is locked (In case of resume from S3 I 
>> have seen it to be the case often).
>> Then we need not reload GuC also unless we are not doing full GPU reset.
>>> but I think this is still the right place to add this in. 
>> Yes
>>> There are several things with GuC that are going to break if we use 
>>> reset=0 (e.g. doorbell cleanup) 
>> Can you elaborate how it might break.
>> i915 isn't currently communicating to GuC (destroy_doorbell) during 
>> doorbell cleanup and if we start communicating then it should
>> not fail as GuC will be available with reset=0.  Also 
>> __intel_uc_reset_hw isn't gated by reset modparam.
>
> As you said we do always reset GuC no matter the value of the 
> modparam, but that does not reset the doorbell HW. If we're coming out 
> of S3 and the state as been preserved this will cause the doorbell HW 
> to be left in an unclean state, which could cause spurious doorbell 
> interrupts to be sent to GuC, not sure how the firmware handles those. 
> The code as moved since last time I looked at this in detail and I 
> think we're now most likely going to overwrite those unclean 
> doorbells, but there are unlikely corner cases (preempt context 
> failing to be created) where we might not do so.
> More generally, my concern was that in the code flow we assume GuC and 
> related HW to be reset and in need of a re-init when we come out of 
> suspend when actually as you reported that might not be the case if we 
> have reset=0. Even if we have no major concerns now, issues might 
> arise in the future after code reworks or new feature additions if we 
> start from a wrong assumption. Instead of changing the flow to 
> consider the reset=0 (which isn't really a supported scenario) I think 
> it'd be more useful to just enforce the fact that we don't support 
> that use-case with GuC, hence my suggestion. And yes, I'm probably 
> just being uber-paranoid :P
>
Makes sense ..... Agree on sanitizing with GuC to now allow reset=0
We could also fix this if we could reset doorbell unit alone at resume 
and acquire needed doorbells but AFAIK earlier guc_init_doorbell_hw is 
the way to reset all doorbells (that needed GuC). As you said we can 
skip these changes though since reset=0 isn't supported scenario.
> Daniele
>
>>> so I wouldn't consider this a regression, but we might want to start 
>>> sanitizing the modparams to not allow reset=0 with GuC.
>>>
>>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>>
>>> Daniele
>>>
>>>> WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
>>>> +    }
>>>>   }
>>>>     int i915_gem_suspend(struct drm_i915_private *dev_priv)
>>>> diff --git a/drivers/gpu/drm/i915/intel_guc.h 
>>>> b/drivers/gpu/drm/i915/intel_guc.h
>>>> index 52856a9..0f6adb1 100644
>>>> --- a/drivers/gpu/drm/i915/intel_guc.h
>>>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>>>> @@ -132,4 +132,9 @@ static inline u32 guc_ggtt_offset(struct 
>>>> i915_vma *vma)
>>>>   struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, 
>>>> u32 size);
>>>>   u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
>>>>   +static inline void intel_guc_prepare_to_reset(struct intel_guc 
>>>> *guc)
>>>> +{
>>>> +    intel_uc_fw_prepare_to_reset(&guc->fw);
>>>> +}
>>>> +
>>>>   #endif
>>>> diff --git a/drivers/gpu/drm/i915/intel_huc.h 
>>>> b/drivers/gpu/drm/i915/intel_huc.h
>>>> index 40039db..96e24f9 100644
>>>> --- a/drivers/gpu/drm/i915/intel_huc.h
>>>> +++ b/drivers/gpu/drm/i915/intel_huc.h
>>>> @@ -38,4 +38,9 @@ struct intel_huc {
>>>>   int intel_huc_init_hw(struct intel_huc *huc);
>>>>   int intel_huc_auth(struct intel_huc *huc);
>>>>   +static inline void intel_huc_prepare_to_reset(struct intel_huc 
>>>> *huc)
>>>> +{
>>>> +    intel_uc_fw_prepare_to_reset(&huc->fw);
>>>> +}
>>>> +
>>>>   #endif
>>>> diff --git a/drivers/gpu/drm/i915/intel_uc.c 
>>>> b/drivers/gpu/drm/i915/intel_uc.c
>>>> index 9f1bac6..8042d4b 100644
>>>> --- a/drivers/gpu/drm/i915/intel_uc.c
>>>> +++ b/drivers/gpu/drm/i915/intel_uc.c
>>>> @@ -445,3 +445,17 @@ void intel_uc_fini_hw(struct drm_i915_private 
>>>> *dev_priv)
>>>>       if (USES_GUC_SUBMISSION(dev_priv))
>>>>           gen9_disable_guc_interrupts(dev_priv);
>>>>   }
>>>> +
>>>> +void intel_uc_prepare_to_reset(struct drm_i915_private *i915)
>>>> +{
>>>> +    struct intel_huc *huc = &i915->huc;
>>>> +    struct intel_guc *guc = &i915->guc;
>>>> +
>>>> +    if (!USES_GUC(i915))
>>>> +        return;
>>>> +
>>>> +    GEM_BUG_ON(!HAS_GUC(i915));
>>>> +
>>>> +    intel_huc_prepare_to_reset(huc);
>>>> +    intel_guc_prepare_to_reset(guc);
>>>> +}
>>>> diff --git a/drivers/gpu/drm/i915/intel_uc.h 
>>>> b/drivers/gpu/drm/i915/intel_uc.h
>>>> index f2984e0..7a8ae58 100644
>>>> --- a/drivers/gpu/drm/i915/intel_uc.h
>>>> +++ b/drivers/gpu/drm/i915/intel_uc.h
>>>> @@ -39,6 +39,7 @@
>>>>   void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
>>>>   int intel_uc_init(struct drm_i915_private *dev_priv);
>>>>   void intel_uc_fini(struct drm_i915_private *dev_priv);
>>>> +void intel_uc_prepare_to_reset(struct drm_i915_private *dev_priv);
>>>>     static inline bool intel_uc_is_using_guc(void)
>>>>   {
>>>> diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h 
>>>> b/drivers/gpu/drm/i915/intel_uc_fw.h
>>>> index d5fd460..f1ee653 100644
>>>> --- a/drivers/gpu/drm/i915/intel_uc_fw.h
>>>> +++ b/drivers/gpu/drm/i915/intel_uc_fw.h
>>>> @@ -115,6 +115,12 @@ static inline bool 
>>>> intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
>>>>       return uc_fw->path != NULL;
>>>>   }
>>>>   +static inline void intel_uc_fw_prepare_to_reset(struct 
>>>> intel_uc_fw *uc_fw)
>>>> +{
>>>> +    if (uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS)
>>>> +        uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING;
>>>> +}
>>>> +
>>>>   void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
>>>>                  struct intel_uc_fw *uc_fw);
>>>>   int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
>>>>
>>

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
  2018-02-27  6:54         ` Sagar Arun Kamble
@ 2018-02-27  7:50           ` Chris Wilson
  2018-03-01 21:55             ` Daniele Ceraolo Spurio
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2018-02-27  7:50 UTC (permalink / raw)
  To: Sagar Arun Kamble, Daniele Ceraolo Spurio, Michal Wajdeczko, intel-gfx

Quoting Sagar Arun Kamble (2018-02-27 06:54:46)
> 
> 
> On 2/27/2018 2:22 AM, Chris Wilson wrote:
> > Quoting Daniele Ceraolo Spurio (2018-02-26 16:57:11)
> >> As you said we do always reset GuC no matter the value of the modparam,
> >> but that does not reset the doorbell HW. If we're coming out of S3 and
> >> the state as been preserved this will cause the doorbell HW to be left
> >> in an unclean state, which could cause spurious doorbell interrupts to
> >> be sent to GuC, not sure how the firmware handles those. The code as
> >> moved since last time I looked at this in detail and I think we're now
> >> most likely going to overwrite those unclean doorbells, but there are
> >> unlikely corner cases (preempt context failing to be created) where we
> >> might not do so.
> > I'm still going "wait, we can put the device into D3 and the GuC is
> > still powered?" Something feels wrong if the GuC retains state after the
> > HW is powered down.
> GuC will be powered down, with RC6. Just that firmware in WOPCM can get 
> wiped off if
> memory is reset/powered down during resume. In case of mem sleep 
> generally WOPCM stays intact and if we exit
> RC6 on resume from sleep, firmware will be restored into GuC without 
> driver intervention.
> But since we do full GPU reset as part of sanitize we have to load it 
> from driver again.

On resume, we don't know if we are coming from module load, resume from
S3, resume S3+RST, resume from S4, or resume from kexec. (S3+RST, kexec
are truly without our knowledge, the others we could feed the
information through but RST makes that moot.) Ergo, you cannot know if
the right fw image is loaded and aiui you should treat the state as
undefined and always reload. Does that make sense? Is there a way you
can query what fw is loaded and so skip instead?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset
  2018-02-27  7:50           ` Chris Wilson
@ 2018-03-01 21:55             ` Daniele Ceraolo Spurio
  0 siblings, 0 replies; 11+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-03-01 21:55 UTC (permalink / raw)
  To: Chris Wilson, Sagar Arun Kamble, Michal Wajdeczko, intel-gfx



On 26/02/18 23:50, Chris Wilson wrote:
> Quoting Sagar Arun Kamble (2018-02-27 06:54:46)
>>
>>
>> On 2/27/2018 2:22 AM, Chris Wilson wrote:
>>> Quoting Daniele Ceraolo Spurio (2018-02-26 16:57:11)
>>>> As you said we do always reset GuC no matter the value of the modparam,
>>>> but that does not reset the doorbell HW. If we're coming out of S3 and
>>>> the state as been preserved this will cause the doorbell HW to be left
>>>> in an unclean state, which could cause spurious doorbell interrupts to
>>>> be sent to GuC, not sure how the firmware handles those. The code as
>>>> moved since last time I looked at this in detail and I think we're now
>>>> most likely going to overwrite those unclean doorbells, but there are
>>>> unlikely corner cases (preempt context failing to be created) where we
>>>> might not do so.
>>> I'm still going "wait, we can put the device into D3 and the GuC is
>>> still powered?" Something feels wrong if the GuC retains state after the
>>> HW is powered down.
>> GuC will be powered down, with RC6. Just that firmware in WOPCM can get
>> wiped off if
>> memory is reset/powered down during resume. In case of mem sleep
>> generally WOPCM stays intact and if we exit
>> RC6 on resume from sleep, firmware will be restored into GuC without
>> driver intervention.
>> But since we do full GPU reset as part of sanitize we have to load it
>> from driver again.
> 
> On resume, we don't know if we are coming from module load, resume from
> S3, resume S3+RST, resume from S4, or resume from kexec. (S3+RST, kexec
> are truly without our knowledge, the others we could feed the
> information through but RST makes that moot.) Ergo, you cannot know if
> the right fw image is loaded and aiui you should treat the state as
> undefined and always reload. Does that make sense? Is there a way you
> can query what fw is loaded and so skip instead?
> -Chris
> 

Not sure if there is already a way to query the FW version, but we could 
ask for a new H2G to be added if there isn't. However, even if the 
firmware version matches, if we can't confirm it is exactly the one we 
loaded (and not a reload of the same version) there is no guarantee that 
its internal state is what we expect. Also, we would have to stop doing 
full gpu resets around suspend/resume which I doubt it is something we want.

Daniele

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-03-01 21:56 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-23 14:04 [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset Michal Wajdeczko
2018-02-23 14:40 ` ✓ Fi.CI.BAT: success for drm/i915/uc: Start preparing GuC/HuC for reset (rev2) Patchwork
2018-02-23 15:38 ` ✗ Fi.CI.IGT: warning " Patchwork
2018-02-23 17:01 ` [PATCH v2] drm/i915/uc: Start preparing GuC/HuC for reset Daniele Ceraolo Spurio
2018-02-26  6:17   ` Sagar Arun Kamble
2018-02-26 16:57     ` Daniele Ceraolo Spurio
2018-02-26 20:52       ` Chris Wilson
2018-02-27  6:54         ` Sagar Arun Kamble
2018-02-27  7:50           ` Chris Wilson
2018-03-01 21:55             ` Daniele Ceraolo Spurio
2018-02-27  7:07       ` Sagar Arun Kamble

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.