All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/4] drm/i915: Always do WOPCM partitioning based on real firmware sizes
@ 2018-04-10  0:42 Jackie Li
  2018-04-10  0:42 ` [PATCH v3 2/4] drm/i915: Always set HUC_LOADING_AGENT_GUC bit in WOPCM offset register Jackie Li
                   ` (6 more replies)
  0 siblings, 7 replies; 22+ messages in thread
From: Jackie Li @ 2018-04-10  0:42 UTC (permalink / raw)
  To: intel-gfx

After enabled the WOPCM write-once registers locking status checking,
reloading of the i915 module will fail with modparam enable_guc set to 3
(enable GuC and HuC firmware loading) if the module was originally loaded
with enable_guc set to 1 (only enable GuC firmware loading). This is
because WOPCM registers were updated and locked without considering the HuC
FW size. Since we need both GuC and HuC FW sizes to determine the final
layout of WOPCM, we should always calculate the WOPCM layout based on the
actual sizes of the GuC and HuC firmware available for a specific platform
if we need continue to support enable/disable HuC FW loading dynamically
with enable_guc modparam.

This patch splits uC firmware fetching into two stages. First stage is to
fetch the firmware image and verify the firmware header. uC firmware will
be marked as verified and this will make FW info available for following
WOPCM layout calculation. The second stage is to create a GEM object and
copy the FW data into the created GEM object which will only be available
when GuC/HuC loading is enabled by enable_guc modparam. This will guarantee
that the WOPCM layout will be always be calculated correctly without making
any assumptions to the GuC and HuC firmware sizes.

v3:
 - Rebase

Signed-off-by: Jackie Li <yaodong.li@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: John Spotswood <john.a.spotswood@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_uc.c    | 14 ++++----------
 drivers/gpu/drm/i915/intel_uc_fw.c | 31 ++++++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_uc_fw.h |  7 +++++--
 3 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 1cffaf7..73b8f6c 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -172,11 +172,8 @@ void intel_uc_init_early(struct drm_i915_private *i915)
 
 	sanitize_options_early(i915);
 
-	if (USES_GUC(i915))
-		intel_uc_fw_fetch(i915, &guc->fw);
-
-	if (USES_HUC(i915))
-		intel_uc_fw_fetch(i915, &huc->fw);
+	intel_uc_fw_fetch(i915, &guc->fw, USES_GUC(i915));
+	intel_uc_fw_fetch(i915, &huc->fw, USES_HUC(i915));
 }
 
 void intel_uc_cleanup_early(struct drm_i915_private *i915)
@@ -184,11 +181,8 @@ void intel_uc_cleanup_early(struct drm_i915_private *i915)
 	struct intel_guc *guc = &i915->guc;
 	struct intel_huc *huc = &i915->huc;
 
-	if (USES_HUC(i915))
-		intel_uc_fw_fini(&huc->fw);
-
-	if (USES_GUC(i915))
-		intel_uc_fw_fini(&guc->fw);
+	intel_uc_fw_fini(&huc->fw);
+	intel_uc_fw_fini(&guc->fw);
 
 	guc_free_load_err_log(guc);
 }
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index 6e8e0b5..a9cb900 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -33,11 +33,13 @@
  *
  * @dev_priv: device private
  * @uc_fw: uC firmware
+ * @copy_to_obj: whether fetch uC firmware into GEM object or not
  *
- * Fetch uC firmware into GEM obj.
+ * Fetch and verify uC firmware and copy firmware data into GEM object if
+ * @copy_to_obj is true.
  */
 void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
-		       struct intel_uc_fw *uc_fw)
+		       struct intel_uc_fw *uc_fw, bool copy_to_obj)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct drm_i915_gem_object *obj;
@@ -154,17 +156,24 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		goto fail;
 	}
 
-	obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		DRM_DEBUG_DRIVER("%s fw object_create err=%d\n",
-				 intel_uc_fw_type_repr(uc_fw->type), err);
-		goto fail;
+	uc_fw->size = fw->size;
+	uc_fw->fetch_status = INTEL_UC_FIRMWARE_VERIFIED;
+
+	if (copy_to_obj) {
+		obj = i915_gem_object_create_from_data(dev_priv, fw->data,
+						       fw->size);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			DRM_DEBUG_DRIVER("%s fw object_create err=%d\n",
+					 intel_uc_fw_type_repr(uc_fw->type),
+					 err);
+			goto fail;
+		}
+
+		uc_fw->obj = obj;
+		uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS;
 	}
 
-	uc_fw->obj = obj;
-	uc_fw->size = fw->size;
-	uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS;
 	DRM_DEBUG_DRIVER("%s fw fetch %s\n",
 			 intel_uc_fw_type_repr(uc_fw->type),
 			 intel_uc_fw_status_repr(uc_fw->fetch_status));
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index dc33b12..4e7ecc8 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -36,6 +36,7 @@ enum intel_uc_fw_status {
 	INTEL_UC_FIRMWARE_FAIL = -1,
 	INTEL_UC_FIRMWARE_NONE = 0,
 	INTEL_UC_FIRMWARE_PENDING,
+	INTEL_UC_FIRMWARE_VERIFIED,
 	INTEL_UC_FIRMWARE_SUCCESS
 };
 
@@ -84,6 +85,8 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
 		return "NONE";
 	case INTEL_UC_FIRMWARE_PENDING:
 		return "PENDING";
+	case INTEL_UC_FIRMWARE_VERIFIED:
+		return "VERIFIED";
 	case INTEL_UC_FIRMWARE_SUCCESS:
 		return "SUCCESS";
 	}
@@ -131,14 +134,14 @@ static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw)
  */
 static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
 {
-	if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
+	if (uc_fw->fetch_status < INTEL_UC_FIRMWARE_VERIFIED)
 		return 0;
 
 	return uc_fw->header_size + uc_fw->ucode_size;
 }
 
 void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
-		       struct intel_uc_fw *uc_fw);
+		       struct intel_uc_fw *uc_fw, bool copy_to_obj);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
 		       int (*xfer)(struct intel_uc_fw *uc_fw,
 				   struct i915_vma *vma));
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread
* [PATCH v3 1/4] drm/i915/uc: Use correct error code for GuC initialization failure
@ 2018-03-13 13:54 Michal Wajdeczko
  2018-03-13 13:54 ` [PATCH v3 4/4] HAX: Enable GuC for CI Michal Wajdeczko
  0 siblings, 1 reply; 22+ messages in thread
From: Michal Wajdeczko @ 2018-03-13 13:54 UTC (permalink / raw)
  To: intel-gfx

Since commit 6ca9a2beb54a ("drm/i915: Unwind i915_gem_init() failure")
we believed that we correctly handle all errors encountered during
GuC initialization, including special one that indicates request to
run driver with disabled GPU submission (-EIO).

Unfortunately since commit 121981fafe69 ("drm/i915/guc: Combine
enable_guc_loading|submission modparams") we stopped using that
error code to avoid unwanted fallback to execlist submission mode.

In result any GuC initialization failure was treated as non-recoverable
error leading to driver load abort, so we could not even read related
GuC error log to investigate cause of the problem.

Fix that by always returning -EIO on uC hardware related failure.

v2: don't allow -EIO from uc_init
    don't call uc_fini[_misc] on -EIO
    mark guc fw as failed on hw init failure
    prepare uc_fini_hw to run after earlier -EIO

v3: update comments (Sagar)
    use sanitize functions on failure in init_hw (Michal)
    and also sanitize guc/huc fw in fini_hw (Michal)

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c    | 17 ++++++++++-------
 drivers/gpu/drm/i915/intel_guc.h   |  5 +++++
 drivers/gpu/drm/i915/intel_uc.c    | 18 ++++++++++++++----
 drivers/gpu/drm/i915/intel_uc_fw.h |  5 +++++
 4 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 05b0724..8eed87d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5338,8 +5338,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_init_gt_powersave(dev_priv);
 
 	ret = intel_uc_init(dev_priv);
-	if (ret)
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
 		goto err_pm;
+	}
 
 	ret = i915_gem_init_hw(dev_priv);
 	if (ret)
@@ -5386,7 +5388,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	i915_gem_contexts_lost(dev_priv);
 	intel_uc_fini_hw(dev_priv);
 err_uc_init:
-	intel_uc_fini(dev_priv);
+	if (ret != -EIO)
+		intel_uc_fini(dev_priv);
 err_pm:
 	if (ret != -EIO) {
 		intel_cleanup_gt_powersave(dev_priv);
@@ -5400,15 +5403,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	intel_uc_fini_misc(dev_priv);
-
-	if (ret != -EIO)
+	if (ret != -EIO) {
+		intel_uc_fini_misc(dev_priv);
 		i915_gem_cleanup_userptr(dev_priv);
+	}
 
 	if (ret == -EIO) {
 		/*
-		 * Allow engine initialisation to fail by marking the GPU as
-		 * wedged. But we only want to do this where the GPU is angry,
+		 * Allow engines or uC initialization to fail by marking the GPU
+		 * as wedged. But we only want to do this when the GPU is angry,
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index d878160..faa9e01 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -139,4 +139,9 @@ static inline int intel_guc_sanitize(struct intel_guc *guc)
 	return 0;
 }
 
+static inline bool intel_guc_is_loaded(struct intel_guc *guc)
+{
+	return intel_uc_fw_is_loaded(&guc->fw);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 9d5ffd7..f89acf4 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -447,15 +447,20 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	 * Note that there is no fallback as either user explicitly asked for
 	 * the GuC or driver default option was to run with the GuC enabled.
 	 */
-	if (GEM_WARN_ON(ret == -EIO))
-		ret = -EINVAL;
-
 	dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret);
-	return ret;
+
+	/* Sanitize GuC/HuC to avoid clean-up on wedged */
+	intel_huc_sanitize(huc);
+	intel_guc_sanitize(guc);
+	GEM_BUG_ON(intel_guc_is_loaded(guc));
+
+	/* We want to disable GPU submission but keep KMS alive */
+	return -EIO;
 }
 
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 {
+	struct intel_huc *huc = &dev_priv->huc;
 	struct intel_guc *guc = &dev_priv->guc;
 
 	if (!USES_GUC(dev_priv))
@@ -463,10 +468,15 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 
+	if (!intel_guc_is_loaded(guc))
+		return;
+
 	if (USES_GUC_SUBMISSION(dev_priv))
 		intel_guc_submission_disable(guc);
 
 	guc_disable_communication(guc);
+	intel_huc_sanitize(huc);
+	intel_guc_sanitize(guc);
 
 	if (USES_GUC_SUBMISSION(dev_priv))
 		gen9_disable_guc_interrupts(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index 2601521..ff9ce9a 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -121,6 +121,11 @@ static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw)
 		uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING;
 }
 
+static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
+{
+	return uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS;
+}
+
 void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		       struct intel_uc_fw *uc_fw);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2018-04-19 21:53 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-10  0:42 [PATCH v3 1/4] drm/i915: Always do WOPCM partitioning based on real firmware sizes Jackie Li
2018-04-10  0:42 ` [PATCH v3 2/4] drm/i915: Always set HUC_LOADING_AGENT_GUC bit in WOPCM offset register Jackie Li
2018-04-13 23:34   ` John Spotswood
2018-04-14  2:26   ` Michal Wajdeczko
2018-04-16 17:43     ` Yaodong Li
2018-04-19 15:52       ` Michal Wajdeczko
2018-04-19 21:26         ` Yaodong Li
2018-04-10  0:42 ` [PATCH v3 3/4] drm/i915: Add code to accept valid locked WOPCM register values Jackie Li
2018-04-13 23:34   ` John Spotswood
2018-04-14  4:20   ` Michal Wajdeczko
2018-04-16 18:43     ` Yaodong Li
2018-04-19 16:37       ` Michal Wajdeczko
2018-04-19 21:50         ` Yaodong Li
2018-04-10  0:42 ` [PATCH v3 4/4] HAX enable guc for CI Jackie Li
2018-04-10  1:26 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/4] drm/i915: Always do WOPCM partitioning based on real firmware sizes Patchwork
2018-04-10  3:03 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-04-13 23:33 ` [PATCH v3 1/4] " John Spotswood
2018-04-14  2:15 ` Michal Wajdeczko
2018-04-16 17:28   ` Yaodong Li
2018-04-19 15:31     ` Michal Wajdeczko
2018-04-19 21:17       ` Yaodong Li
  -- strict thread matches above, loose matches on Subject: below --
2018-03-13 13:54 [PATCH v3 1/4] drm/i915/uc: Use correct error code for GuC initialization failure Michal Wajdeczko
2018-03-13 13:54 ` [PATCH v3 4/4] HAX: Enable GuC for CI Michal Wajdeczko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.