All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Ceraolo Spurio, Daniele" <daniele.ceraolospurio@intel.com>
To: <John.C.Harrison@Intel.com>, <Intel-GFX@Lists.FreeDesktop.Org>
Cc: DRI-Devel@Lists.FreeDesktop.Org
Subject: Re: [Intel-gfx] [PATCH v3 4/6] drm/i915/uc: Enhancements to firmware table validation
Date: Fri, 5 May 2023 18:58:27 +0200	[thread overview]
Message-ID: <2c5bfdab-1adb-b022-ca58-92a90f2a13d5@intel.com> (raw)
In-Reply-To: <20230502234007.1762014-5-John.C.Harrison@Intel.com>



On 5/3/2023 1:40 AM, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> The validation of the firmware table was being done inside the code
> for scanning the table for the next available firmware blob. Which is
> unnecessary. So pull it out into a separate function that is only
> called once per blob type at init time.
>
> Also, drop the CONFIG_SELFTEST requirement and make errors terminal.
> It was mentioned that potential issues with backports would not be
> caught by regular pre-merge CI as that only occurs on tip not stable
> branches. Making the validation unconditional and failing driver load
> on detecting of a problem ensures that such backports will also be
> validated correctly.
>
> This requires adding a firmware global flag to indicate an issue with
> any of the per firmware tables. This is done rather than adding a new
> state enum as a new enum value would be a much more invasive change -
> lots of places would need updating to support the new error state.
>
> Note also that this change means that a table error will cause the
> driver to wedge even on platforms that don't require firmware files.
> This is intentional as per the above backport concern - someone doing
> backports is not guaranteed to test on every platform that they may
> potential affect. So forcing a failure on all platforms ensures that
> the problem will be noticed and corrected immediately.
>
> v2: Change to unconditionally fail module load on a validation error
> (review feedback/discussion with Daniele).
> v3: Add a new flag to track table validation errors (review
> feedback/discussion with Daniele).
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Daniele

> ---
>   drivers/gpu/drm/i915/gt/uc/intel_uc.c    |   3 +
>   drivers/gpu/drm/i915/gt/uc/intel_uc.h    |   1 +
>   drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 161 +++++++++++++----------
>   3 files changed, 99 insertions(+), 66 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 996168312340e..1381943b8973d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -432,6 +432,9 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc)
>   
>   static int __uc_check_hw(struct intel_uc *uc)
>   {
> +	if (uc->fw_table_invalid)
> +		return -EIO;
> +
>   	if (!intel_uc_supports_guc(uc))
>   		return 0;
>   
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> index 5d0f1bcc381e8..d585524d94deb 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> @@ -36,6 +36,7 @@ struct intel_uc {
>   	struct drm_i915_gem_object *load_err_log;
>   
>   	bool reset_in_progress;
> +	bool fw_table_invalid;
>   };
>   
>   void intel_uc_init_early(struct intel_uc *uc);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index 55e50bd08d7ff..64e19688788d1 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -233,20 +233,22 @@ struct fw_blobs_by_type {
>   	u32 count;
>   };
>   
> +static const struct uc_fw_platform_requirement blobs_guc[] = {
> +	INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
> +};
> +
> +static const struct uc_fw_platform_requirement blobs_huc[] = {
> +	INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
> +};
> +
> +static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
> +	[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
> +	[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
> +};
> +
>   static void
>   __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
>   {
> -	static const struct uc_fw_platform_requirement blobs_guc[] = {
> -		INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
> -	};
> -	static const struct uc_fw_platform_requirement blobs_huc[] = {
> -		INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
> -	};
> -	static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
> -		[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
> -		[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
> -	};
> -	static bool verified[INTEL_UC_FW_NUM_TYPES];
>   	const struct uc_fw_platform_requirement *fw_blobs;
>   	enum intel_platform p = INTEL_INFO(i915)->platform;
>   	u32 fw_count;
> @@ -286,6 +288,11 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
>   			continue;
>   
>   		if (uc_fw->file_selected.path) {
> +			/*
> +			 * Continuing an earlier search after a found blob failed to load.
> +			 * Once the previously chosen path has been found, clear it out
> +			 * and let the search continue from there.
> +			 */
>   			if (uc_fw->file_selected.path == blob->path)
>   				uc_fw->file_selected.path = NULL;
>   
> @@ -306,76 +313,91 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
>   		/* Failed to find a match for the last attempt?! */
>   		uc_fw->file_selected.path = NULL;
>   	}
> +}
>   
> -	/* make sure the list is ordered as expected */
> -	if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified[uc_fw->type]) {
> -		verified[uc_fw->type] = true;
> +static bool validate_fw_table_type(struct drm_i915_private *i915, enum intel_uc_fw_type type)
> +{
> +	const struct uc_fw_platform_requirement *fw_blobs;
> +	u32 fw_count;
> +	int i;
>   
> -		for (i = 1; i < fw_count; i++) {
> -			/* Next platform is good: */
> -			if (fw_blobs[i].p < fw_blobs[i - 1].p)
> -				continue;
> +	if (type >= ARRAY_SIZE(blobs_all)) {
> +		drm_err(&i915->drm, "No blob array for %s\n", intel_uc_fw_type_repr(type));
> +		return false;
> +	}
>   
> -			/* Next platform revision is good: */
> -			if (fw_blobs[i].p == fw_blobs[i - 1].p &&
> -			    fw_blobs[i].rev < fw_blobs[i - 1].rev)
> -				continue;
> +	fw_blobs = blobs_all[type].blobs;
> +	fw_count = blobs_all[type].count;
>   
> -			/* Platform/revision must be in order: */
> -			if (fw_blobs[i].p != fw_blobs[i - 1].p ||
> -			    fw_blobs[i].rev != fw_blobs[i - 1].rev)
> -				goto bad;
> +	if (!fw_count)
> +		return true;
>   
> -			/* Next major version is good: */
> -			if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major)
> -				continue;
> +	/* make sure the list is ordered as expected */
> +	for (i = 1; i < fw_count; i++) {
> +		/* Next platform is good: */
> +		if (fw_blobs[i].p < fw_blobs[i - 1].p)
> +			continue;
>   
> -			/* New must be before legacy: */
> -			if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy)
> -				goto bad;
> +		/* Next platform revision is good: */
> +		if (fw_blobs[i].p == fw_blobs[i - 1].p &&
> +		    fw_blobs[i].rev < fw_blobs[i - 1].rev)
> +			continue;
>   
> -			/* New to legacy also means 0.0 to X.Y (HuC), or X.0 to X.Y (GuC) */
> -			if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) {
> -				if (!fw_blobs[i - 1].blob.major)
> -					continue;
> +		/* Platform/revision must be in order: */
> +		if (fw_blobs[i].p != fw_blobs[i - 1].p ||
> +		    fw_blobs[i].rev != fw_blobs[i - 1].rev)
> +			goto bad;
>   
> -				if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major)
> -					continue;
> -			}
> +		/* Next major version is good: */
> +		if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major)
> +			continue;
>   
> -			/* Major versions must be in order: */
> -			if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major)
> -				goto bad;
> +		/* New must be before legacy: */
> +		if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy)
> +			goto bad;
>   
> -			/* Next minor version is good: */
> -			if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor)
> +		/* New to legacy also means 0.0 to X.Y (HuC), or X.0 to X.Y (GuC) */
> +		if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) {
> +			if (!fw_blobs[i - 1].blob.major)
>   				continue;
>   
> -			/* Minor versions must be in order: */
> -			if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor)
> -				goto bad;
> -
> -			/* Patch versions must be in order: */
> -			if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch)
> +			if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major)
>   				continue;
> +		}
> +
> +		/* Major versions must be in order: */
> +		if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major)
> +			goto bad;
> +
> +		/* Next minor version is good: */
> +		if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor)
> +			continue;
> +
> +		/* Minor versions must be in order: */
> +		if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor)
> +			goto bad;
> +
> +		/* Patch versions must be in order: */
> +		if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch)
> +			continue;
>   
>   bad:
> -			drm_err(&i915->drm, "Invalid %s blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n",
> -				intel_uc_fw_type_repr(uc_fw->type),
> -				intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev,
> -				fw_blobs[i - 1].blob.legacy ? "L" : "v",
> -				fw_blobs[i - 1].blob.major,
> -				fw_blobs[i - 1].blob.minor,
> -				fw_blobs[i - 1].blob.patch,
> -				intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev,
> -				fw_blobs[i].blob.legacy ? "L" : "v",
> -				fw_blobs[i].blob.major,
> -				fw_blobs[i].blob.minor,
> -				fw_blobs[i].blob.patch);
> -
> -			uc_fw->file_selected.path = NULL;
> -		}
> +		drm_err(&i915->drm, "Invalid %s blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n",
> +			intel_uc_fw_type_repr(type),
> +			intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev,
> +			fw_blobs[i - 1].blob.legacy ? "L" : "v",
> +			fw_blobs[i - 1].blob.major,
> +			fw_blobs[i - 1].blob.minor,
> +			fw_blobs[i - 1].blob.patch,
> +			intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev,
> +			fw_blobs[i].blob.legacy ? "L" : "v",
> +			fw_blobs[i].blob.major,
> +			fw_blobs[i].blob.minor,
> +			fw_blobs[i].blob.patch);
> +		return false;
>   	}
> +
> +	return true;
>   }
>   
>   static const char *__override_guc_firmware_path(struct drm_i915_private *i915)
> @@ -430,7 +452,8 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc
>   void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
>   			    enum intel_uc_fw_type type)
>   {
> -	struct drm_i915_private *i915 = ____uc_fw_to_gt(uc_fw, type)->i915;
> +	struct intel_gt *gt = ____uc_fw_to_gt(uc_fw, type);
> +	struct drm_i915_private *i915 = gt->i915;
>   
>   	/*
>   	 * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
> @@ -443,6 +466,12 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
>   	uc_fw->type = type;
>   
>   	if (HAS_GT_UC(i915)) {
> +		if (!validate_fw_table_type(i915, type)) {
> +			gt->uc.fw_table_invalid = true;
> +			intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_NOT_SUPPORTED);
> +			return;
> +		}
> +
>   		__uc_fw_auto_select(i915, uc_fw);
>   		__uc_fw_user_override(i915, uc_fw);
>   	}


  reply	other threads:[~2023-05-05 16:58 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-02 23:40 [PATCH v3 0/6] Improvements to uc firmare management John.C.Harrison
2023-05-02 23:40 ` [Intel-gfx] " John.C.Harrison
2023-05-02 23:40 ` [PATCH v3 1/6] drm/i915/guc: Decode another GuC load failure case John.C.Harrison
2023-05-02 23:40   ` [Intel-gfx] " John.C.Harrison
2023-05-02 23:40 ` [PATCH v3 2/6] drm/i915/guc: Print status register when waiting for GuC to load John.C.Harrison
2023-05-02 23:40   ` [Intel-gfx] " John.C.Harrison
2023-05-02 23:40 ` [PATCH v3 3/6] drm/i915/uc: Track patch level versions on reduced version firmware files John.C.Harrison
2023-05-02 23:40   ` [Intel-gfx] " John.C.Harrison
2023-05-02 23:40 ` [PATCH v3 4/6] drm/i915/uc: Enhancements to firmware table validation John.C.Harrison
2023-05-02 23:40   ` [Intel-gfx] " John.C.Harrison
2023-05-05 16:58   ` Ceraolo Spurio, Daniele [this message]
2023-05-02 23:40 ` [PATCH v3 5/6] drm/i915/uc: Reject duplicate entries in firmware table John.C.Harrison
2023-05-02 23:40   ` [Intel-gfx] " John.C.Harrison
2023-05-02 23:40 ` [PATCH v3 6/6] drm/i915/uc: Make unexpected firmware versions an error in debug builds John.C.Harrison
2023-05-02 23:40   ` [Intel-gfx] " John.C.Harrison
2023-05-03  0:06 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Improvements to uc firmare management (rev5) Patchwork
2023-05-03  0:06 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2023-05-03  0:16 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2023-05-03  1:33 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2c5bfdab-1adb-b022-ca58-92a90f2a13d5@intel.com \
    --to=daniele.ceraolospurio@intel.com \
    --cc=DRI-Devel@Lists.FreeDesktop.Org \
    --cc=Intel-GFX@Lists.FreeDesktop.Org \
    --cc=John.C.Harrison@Intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.