[PATCH] drm/i915/guc: Check for ct enabled while waiting for response

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] drm/i915/guc: Check for ct enabled while waiting for response
@ 2022-06-16 22:01 ` Zhanjun Dong
  0 siblings, 0 replies; 10+ messages in thread
From: Zhanjun Dong @ 2022-06-16 22:01 UTC (permalink / raw)
  To: intel-gfx, dri-devel; +Cc: Zhanjun Dong

We are seeing error message of "No response for request". Some cases
happened while waiting for response and reset/suspend action was triggered.
In this case, no response is not an error, active requests will be
cancelled.

This patch will handle this condition and change the error message into
debug message.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 ++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index f01325cd1b62..f07a7666b1ad 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -455,6 +455,7 @@ static int ct_write(struct intel_guc_ct *ct,
 
 /**
  * wait_for_ct_request_update - Wait for CT request state update.
+ * @ct:		pointer to CT
  * @req:	pointer to pending request
  * @status:	placeholder for status
  *
@@ -467,9 +468,10 @@ static int ct_write(struct intel_guc_ct *ct,
  * *	0 response received (status is valid)
  * *	-ETIMEDOUT no response within hardcoded timeout
  */
-static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
+static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status)
 {
 	int err;
+	bool ct_enabled;
 
 	/*
 	 * Fast commands should complete in less than 10us, so sample quickly
@@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
 #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
 #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
 #define done \
-	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
+	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
+	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
 	 GUC_HXG_ORIGIN_GUC)
 	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
 	if (err)
 		err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
 #undef done
+	if (!ct_enabled)
+		err = -ECANCELED;
 
 	*status = req->status;
 	return err;
@@ -703,11 +708,15 @@ static int ct_send(struct intel_guc_ct *ct,
 
 	intel_guc_notify(ct_to_guc(ct));
 
-	err = wait_for_ct_request_update(&request, status);
+	err = wait_for_ct_request_update(ct, &request, status);
 	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
 	if (unlikely(err)) {
-		CT_ERROR(ct, "No response for request %#x (fence %u)\n",
-			 action[0], request.fence);
+		if (err == -ECANCELED)
+			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
+				 action[0], request.fence);
+		else
+			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
+				 action[0], request.fence);
 		goto unlink;
 	}
 
@@ -771,8 +780,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
 
 	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
 	if (unlikely(ret < 0)) {
-		CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
-			 action[0], ERR_PTR(ret), status);
+		if (ret != -ECANCELED)
+			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
+				 action[0], ERR_PTR(ret), status);
 	} else if (unlikely(ret)) {
 		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
 			 action[0], ret, ret);
-- 
2.36.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while waiting for response
@ 2022-06-16 22:01 ` Zhanjun Dong
  0 siblings, 0 replies; 10+ messages in thread
From: Zhanjun Dong @ 2022-06-16 22:01 UTC (permalink / raw)
  To: intel-gfx, dri-devel; +Cc: Zhanjun Dong

We are seeing error message of "No response for request". Some cases
happened while waiting for response and reset/suspend action was triggered.
In this case, no response is not an error, active requests will be
cancelled.

This patch will handle this condition and change the error message into
debug message.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 ++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index f01325cd1b62..f07a7666b1ad 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -455,6 +455,7 @@ static int ct_write(struct intel_guc_ct *ct,
 
 /**
  * wait_for_ct_request_update - Wait for CT request state update.
+ * @ct:		pointer to CT
  * @req:	pointer to pending request
  * @status:	placeholder for status
  *
@@ -467,9 +468,10 @@ static int ct_write(struct intel_guc_ct *ct,
  * *	0 response received (status is valid)
  * *	-ETIMEDOUT no response within hardcoded timeout
  */
-static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
+static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status)
 {
 	int err;
+	bool ct_enabled;
 
 	/*
 	 * Fast commands should complete in less than 10us, so sample quickly
@@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
 #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
 #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
 #define done \
-	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
+	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
+	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
 	 GUC_HXG_ORIGIN_GUC)
 	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
 	if (err)
 		err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
 #undef done
+	if (!ct_enabled)
+		err = -ECANCELED;
 
 	*status = req->status;
 	return err;
@@ -703,11 +708,15 @@ static int ct_send(struct intel_guc_ct *ct,
 
 	intel_guc_notify(ct_to_guc(ct));
 
-	err = wait_for_ct_request_update(&request, status);
+	err = wait_for_ct_request_update(ct, &request, status);
 	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
 	if (unlikely(err)) {
-		CT_ERROR(ct, "No response for request %#x (fence %u)\n",
-			 action[0], request.fence);
+		if (err == -ECANCELED)
+			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
+				 action[0], request.fence);
+		else
+			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
+				 action[0], request.fence);
 		goto unlink;
 	}
 
@@ -771,8 +780,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
 
 	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
 	if (unlikely(ret < 0)) {
-		CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
-			 action[0], ERR_PTR(ret), status);
+		if (ret != -ECANCELED)
+			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
+				 action[0], ERR_PTR(ret), status);
 	} else if (unlikely(ret)) {
 		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
 			 action[0], ret, ret);
-- 
2.36.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/guc: Check for ct enabled while waiting for response
  2022-06-16 22:01 ` [Intel-gfx] " Zhanjun Dong
  (?)
@ 2022-06-17  1:20 ` Patchwork
  -1 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2022-06-17  1:20 UTC (permalink / raw)
  To: Zhanjun Dong; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 3648 bytes --]

== Series Details ==

Series: drm/i915/guc: Check for ct enabled while waiting for response
URL   : https://patchwork.freedesktop.org/series/105258/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11773 -> Patchwork_105258v1
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/index.html

Participating hosts (35 -> 34)
------------------------------

  Missing    (1): fi-bdw-samus 

Known issues
------------

  Here are the changes found in Patchwork_105258v1 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live@gem:
    - fi-blb-e6850:       NOTRUN -> [DMESG-FAIL][1] ([i915#4528])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/fi-blb-e6850/igt@i915_selftest@live@gem.html
    - fi-pnv-d510:        NOTRUN -> [DMESG-FAIL][2] ([i915#4528])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/fi-pnv-d510/igt@i915_selftest@live@gem.html

  * igt@kms_flip@basic-flip-vs-wf_vblank@a-edp1:
    - fi-tgl-u2:          [PASS][3] -> [DMESG-WARN][4] ([i915#402]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/fi-tgl-u2/igt@kms_flip@basic-flip-vs-wf_vblank@a-edp1.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/fi-tgl-u2/igt@kms_flip@basic-flip-vs-wf_vblank@a-edp1.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@gt_lrc:
    - fi-bsw-n3050:       [DMESG-FAIL][5] ([i915#2373]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/fi-bsw-n3050/igt@i915_selftest@live@gt_lrc.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/fi-bsw-n3050/igt@i915_selftest@live@gt_lrc.html

  * igt@i915_selftest@live@requests:
    - fi-blb-e6850:       [DMESG-FAIL][7] ([i915#4528]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/fi-blb-e6850/igt@i915_selftest@live@requests.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/fi-blb-e6850/igt@i915_selftest@live@requests.html
    - fi-pnv-d510:        [DMESG-FAIL][9] ([i915#4528]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/fi-pnv-d510/igt@i915_selftest@live@requests.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/fi-pnv-d510/igt@i915_selftest@live@requests.html

  * igt@kms_flip@basic-flip-vs-modeset@a-edp1:
    - fi-tgl-u2:          [DMESG-WARN][11] ([i915#402]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/fi-tgl-u2/igt@kms_flip@basic-flip-vs-modeset@a-edp1.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/fi-tgl-u2/igt@kms_flip@basic-flip-vs-modeset@a-edp1.html

  
  [i915#2373]: https://gitlab.freedesktop.org/drm/intel/issues/2373
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#4528]: https://gitlab.freedesktop.org/drm/intel/issues/4528


Build changes
-------------

  * Linux: CI_DRM_11773 -> Patchwork_105258v1

  CI-20190529: 20190529
  CI_DRM_11773: 8025a295b7aa707f64c7984b7781c6f25e22a901 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6533: 6b5107d91827962808441db6b98e478aa9e67bdb @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_105258v1: 8025a295b7aa707f64c7984b7781c6f25e22a901 @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

746f27f6bf8f drm/i915/guc: Check for ct enabled while waiting for response

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/index.html

[-- Attachment #2: Type: text/html, Size: 4684 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while waiting for response
  2022-06-16 22:01 ` [Intel-gfx] " Zhanjun Dong
@ 2022-06-17  4:42   ` Dixit, Ashutosh
  -1 siblings, 0 replies; 10+ messages in thread
From: Dixit, Ashutosh @ 2022-06-17  4:42 UTC (permalink / raw)
  To: Zhanjun Dong
  Cc: intel-gfx, Daniele Ceraolo Spurio, John Harrison, dri-devel,
	Michal Wajdeczko

On Thu, 16 Jun 2022 15:01:59 -0700, Zhanjun Dong wrote:
>
> We are seeing error message of "No response for request". Some cases
> happened while waiting for response and reset/suspend action was triggered.
> In this case, no response is not an error, active requests will be
> cancelled.
>
> This patch will handle this condition and change the error message into
> debug message.

The convention we follow in drm is to record the version of the patch and
what changed in that version.

Generally I am ok with this version of the patch but still have a couple of
questions.

> -static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
> +static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status)
>  {
>	int err;
> +	bool ct_enabled;
>
>	/*
>	 * Fast commands should complete in less than 10us, so sample quickly
> @@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
>  #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
>  #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
>  #define done \
> -	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
> +	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
> +	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
>	 GUC_HXG_ORIGIN_GUC)
>	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
>	if (err)
>		err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
>  #undef done
> +	if (!ct_enabled)
> +		err = -ECANCELED;

So we have the choice of either setting the request status here as I was
suggesting earlier, e.g. as follows:

	#define   GUC_HXG_TYPE_REQUEST_CANCELED        4u // unused value

	if (!ct_enabled)
		req->status = GUC_HXG_TYPE_REQUEST_CANCELED;

We would return 0 in this case and would check for the req->status value
above where needed.

Or we can return -ECANCELED. I don't know if -ECANCELED is the right value
to return but whatever we return will have to be unique (ununsed elsewhere)
since we are relying on the return value. -ECANCELED is unique so that part
is ok.

Do other reviewers have a preference whether we should set req->status or
return a unique return value?

>	*status = req->status;
>	return err;
> @@ -703,11 +708,15 @@ static int ct_send(struct intel_guc_ct *ct,
>
>	intel_guc_notify(ct_to_guc(ct));
>
> -	err = wait_for_ct_request_update(&request, status);
> +	err = wait_for_ct_request_update(ct, &request, status);
>	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
>	if (unlikely(err)) {
> -		CT_ERROR(ct, "No response for request %#x (fence %u)\n",
> -			 action[0], request.fence);
> +		if (err == -ECANCELED)
> +			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
> +				 action[0], request.fence);
> +		else
> +			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
> +				 action[0], request.fence);
>		goto unlink;
>	}
>
> @@ -771,8 +780,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
>
>	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
>	if (unlikely(ret < 0)) {
> -		CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
> -			 action[0], ERR_PTR(ret), status);
> +		if (ret != -ECANCELED)
> +			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
> +				 action[0], ERR_PTR(ret), status);

I am wondering why we even have this print and should we just delete it or
convert it to CT_DEBUG(). The reason is that only error prints closest to
where the actual error occurs are useful since they pin-point the error
clearly. This to be seems to be a "second" print from a higher level
function which does not seem particularly useful.


>	} else if (unlikely(ret)) {
>		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
>			 action[0], ret, ret);
> --
> 2.36.0
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while waiting for response
@ 2022-06-17  4:42   ` Dixit, Ashutosh
  0 siblings, 0 replies; 10+ messages in thread
From: Dixit, Ashutosh @ 2022-06-17  4:42 UTC (permalink / raw)
  To: Zhanjun Dong; +Cc: intel-gfx, dri-devel

On Thu, 16 Jun 2022 15:01:59 -0700, Zhanjun Dong wrote:
>
> We are seeing error message of "No response for request". Some cases
> happened while waiting for response and reset/suspend action was triggered.
> In this case, no response is not an error, active requests will be
> cancelled.
>
> This patch will handle this condition and change the error message into
> debug message.

The convention we follow in drm is to record the version of the patch and
what changed in that version.

Generally I am ok with this version of the patch but still have a couple of
questions.

> -static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
> +static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status)
>  {
>	int err;
> +	bool ct_enabled;
>
>	/*
>	 * Fast commands should complete in less than 10us, so sample quickly
> @@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
>  #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
>  #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
>  #define done \
> -	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
> +	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
> +	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
>	 GUC_HXG_ORIGIN_GUC)
>	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
>	if (err)
>		err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
>  #undef done
> +	if (!ct_enabled)
> +		err = -ECANCELED;

So we have the choice of either setting the request status here as I was
suggesting earlier, e.g. as follows:

	#define   GUC_HXG_TYPE_REQUEST_CANCELED        4u // unused value

	if (!ct_enabled)
		req->status = GUC_HXG_TYPE_REQUEST_CANCELED;

We would return 0 in this case and would check for the req->status value
above where needed.

Or we can return -ECANCELED. I don't know if -ECANCELED is the right value
to return but whatever we return will have to be unique (ununsed elsewhere)
since we are relying on the return value. -ECANCELED is unique so that part
is ok.

Do other reviewers have a preference whether we should set req->status or
return a unique return value?

>	*status = req->status;
>	return err;
> @@ -703,11 +708,15 @@ static int ct_send(struct intel_guc_ct *ct,
>
>	intel_guc_notify(ct_to_guc(ct));
>
> -	err = wait_for_ct_request_update(&request, status);
> +	err = wait_for_ct_request_update(ct, &request, status);
>	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
>	if (unlikely(err)) {
> -		CT_ERROR(ct, "No response for request %#x (fence %u)\n",
> -			 action[0], request.fence);
> +		if (err == -ECANCELED)
> +			CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
> +				 action[0], request.fence);
> +		else
> +			CT_ERROR(ct, "No response for request %#x (fence %u)\n",
> +				 action[0], request.fence);
>		goto unlink;
>	}
>
> @@ -771,8 +780,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
>
>	ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
>	if (unlikely(ret < 0)) {
> -		CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
> -			 action[0], ERR_PTR(ret), status);
> +		if (ret != -ECANCELED)
> +			CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
> +				 action[0], ERR_PTR(ret), status);

I am wondering why we even have this print and should we just delete it or
convert it to CT_DEBUG(). The reason is that only error prints closest to
where the actual error occurs are useful since they pin-point the error
clearly. This to be seems to be a "second" print from a higher level
function which does not seem particularly useful.


>	} else if (unlikely(ret)) {
>		CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
>			 action[0], ret, ret);
> --
> 2.36.0
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while waiting for response
  2022-06-16 22:01 ` [Intel-gfx] " Zhanjun Dong
                   ` (2 preceding siblings ...)
  (?)
@ 2022-06-17  4:50 ` Dixit, Ashutosh
  2022-07-12 19:47   ` Dixit, Ashutosh
  -1 siblings, 1 reply; 10+ messages in thread
From: Dixit, Ashutosh @ 2022-06-17  4:50 UTC (permalink / raw)
  To: Zhanjun Dong; +Cc: intel-gfx, dri-devel

On Thu, 16 Jun 2022 15:01:59 -0700, Zhanjun Dong wrote:
>
> We are seeing error message of "No response for request". Some cases
> happened while waiting for response and reset/suspend action was triggered.
> In this case, no response is not an error, active requests will be
> cancelled.
>
> This patch will handle this condition and change the error message into
> debug message.
>
> Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 ++++++++++++++++-------
>  1 file changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index f01325cd1b62..f07a7666b1ad 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -455,6 +455,7 @@ static int ct_write(struct intel_guc_ct *ct,
>
>  /**
>   * wait_for_ct_request_update - Wait for CT request state update.
> + * @ct:		pointer to CT
>   * @req:	pointer to pending request
>   * @status:	placeholder for status
>   *
> @@ -467,9 +468,10 @@ static int ct_write(struct intel_guc_ct *ct,
>   * *	0 response received (status is valid)
>   * *	-ETIMEDOUT no response within hardcoded timeout
>   */
> -static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
> +static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status)
>  {
>	int err;
> +	bool ct_enabled;
>
>	/*
>	 * Fast commands should complete in less than 10us, so sample quickly
> @@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
>  #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
>  #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
>  #define done \
> -	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
> +	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
> +	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
>	 GUC_HXG_ORIGIN_GUC)
>	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
>	if (err)
>		err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
>  #undef done
> +	if (!ct_enabled)
> +		err = -ECANCELED;

Actually here's an even simpler suggestion. We could just do:

	if (!ct_enabled)
		CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n", ...);

And return 0 as before. This way we won't have to make any changes in
either ct_send() or intel_guc_ct_send(). So intel_guc_ct_enabled() just
serves to get us out of the wait early and prevent the -ETIMEDOUT return
(and 0 return avoids all the error messages we are trying to eliminate).

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915/guc: Check for ct enabled while waiting for response
  2022-06-16 22:01 ` [Intel-gfx] " Zhanjun Dong
                   ` (3 preceding siblings ...)
  (?)
@ 2022-06-17 11:35 ` Patchwork
  -1 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2022-06-17 11:35 UTC (permalink / raw)
  To: Zhanjun Dong; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 15883 bytes --]

== Series Details ==

Series: drm/i915/guc: Check for ct enabled while waiting for response
URL   : https://patchwork.freedesktop.org/series/105258/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11773_full -> Patchwork_105258v1_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts

Known issues
------------

  Here are the changes found in Patchwork_105258v1_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_eio@in-flight-10ms:
    - shard-tglb:         [PASS][1] -> [TIMEOUT][2] ([i915#3063]) +1 similar issue
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-tglb3/igt@gem_eio@in-flight-10ms.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-tglb8/igt@gem_eio@in-flight-10ms.html

  * igt@gem_eio@in-flight-contexts-1us:
    - shard-snb:          [PASS][3] -> [FAIL][4] ([i915#4409])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-snb5/igt@gem_eio@in-flight-contexts-1us.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-snb5/igt@gem_eio@in-flight-contexts-1us.html

  * igt@gem_exec_balancer@parallel-bb-first:
    - shard-iclb:         [PASS][5] -> [SKIP][6] ([i915#4525])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb2/igt@gem_exec_balancer@parallel-bb-first.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb5/igt@gem_exec_balancer@parallel-bb-first.html

  * igt@gem_exec_fair@basic-flow@rcs0:
    - shard-tglb:         [PASS][7] -> [FAIL][8] ([i915#2842]) +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-tglb3/igt@gem_exec_fair@basic-flow@rcs0.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-tglb6/igt@gem_exec_fair@basic-flow@rcs0.html

  * igt@gem_exec_fair@basic-none-share@rcs0:
    - shard-iclb:         [PASS][9] -> [FAIL][10] ([i915#2842])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb8/igt@gem_exec_fair@basic-none-share@rcs0.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb5/igt@gem_exec_fair@basic-none-share@rcs0.html

  * igt@gem_exec_fair@basic-none@vecs0:
    - shard-apl:          [PASS][11] -> [FAIL][12] ([i915#2842])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-apl8/igt@gem_exec_fair@basic-none@vecs0.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-apl8/igt@gem_exec_fair@basic-none@vecs0.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-kbl:          [PASS][13] -> [FAIL][14] ([i915#2842]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-kbl3/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-kbl4/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_whisper@basic-queues-all:
    - shard-glk:          [PASS][15] -> [DMESG-WARN][16] ([i915#118])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-glk2/igt@gem_exec_whisper@basic-queues-all.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-glk5/igt@gem_exec_whisper@basic-queues-all.html

  * igt@i915_suspend@fence-restore-tiled2untiled:
    - shard-apl:          [PASS][17] -> [DMESG-WARN][18] ([i915#180])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-apl7/igt@i915_suspend@fence-restore-tiled2untiled.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-apl1/igt@i915_suspend@fence-restore-tiled2untiled.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-kbl:          [PASS][19] -> [DMESG-WARN][20] ([i915#180]) +2 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-kbl4/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-mmap-wc:
    - shard-apl:          NOTRUN -> [SKIP][21] ([fdo#109271]) +12 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-apl6/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-mmap-wc.html

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-d-frame-sequence:
    - shard-apl:          NOTRUN -> [SKIP][22] ([fdo#109271] / [i915#533])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-apl6/igt@kms_pipe_crc_basic@nonblocking-crc-pipe-d-frame-sequence.html

  * igt@kms_plane_scaling@planes-upscale-factor-0-25-downscale-factor-0-5@pipe-a-edp-1:
    - shard-iclb:         [PASS][23] -> [SKIP][24] ([i915#5235]) +2 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb4/igt@kms_plane_scaling@planes-upscale-factor-0-25-downscale-factor-0-5@pipe-a-edp-1.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb2/igt@kms_plane_scaling@planes-upscale-factor-0-25-downscale-factor-0-5@pipe-a-edp-1.html

  * igt@kms_psr@psr2_no_drrs:
    - shard-iclb:         [PASS][25] -> [SKIP][26] ([fdo#109441]) +1 similar issue
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb2/igt@kms_psr@psr2_no_drrs.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb5/igt@kms_psr@psr2_no_drrs.html

  * igt@kms_psr_stress_test@invalidate-primary-flip-overlay:
    - shard-tglb:         [PASS][27] -> [SKIP][28] ([i915#5519])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-tglb6/igt@kms_psr_stress_test@invalidate-primary-flip-overlay.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-tglb5/igt@kms_psr_stress_test@invalidate-primary-flip-overlay.html

  * igt@sw_sync@sync_multi_timeline_wait:
    - shard-apl:          NOTRUN -> [FAIL][29] ([i915#6140])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-apl6/igt@sw_sync@sync_multi_timeline_wait.html

  
#### Possible fixes ####

  * igt@gem_exec_balancer@parallel:
    - shard-iclb:         [SKIP][30] ([i915#4525]) -> [PASS][31]
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb6/igt@gem_exec_balancer@parallel.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb4/igt@gem_exec_balancer@parallel.html

  * igt@gem_exec_fair@basic-none@vcs0:
    - shard-kbl:          [FAIL][32] ([i915#2842]) -> [PASS][33] +3 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-kbl3/igt@gem_exec_fair@basic-none@vcs0.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-kbl1/igt@gem_exec_fair@basic-none@vcs0.html

  * igt@gem_huc_copy@huc-copy:
    - shard-tglb:         [SKIP][34] ([i915#2190]) -> [PASS][35]
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-tglb6/igt@gem_huc_copy@huc-copy.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-tglb3/igt@gem_huc_copy@huc-copy.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][36] ([i915#454]) -> [PASS][37]
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb6/igt@i915_pm_dc@dc6-psr.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb4/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_dc@dc9-dpms:
    - shard-apl:          [FAIL][38] ([i915#4275]) -> [PASS][39]
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-apl4/igt@i915_pm_dc@dc9-dpms.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-apl3/igt@i915_pm_dc@dc9-dpms.html

  * igt@kms_cursor_legacy@cursor-vs-flip-legacy:
    - shard-iclb:         [DMESG-WARN][40] ([i915#1888]) -> [PASS][41]
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb1/igt@kms_cursor_legacy@cursor-vs-flip-legacy.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb6/igt@kms_cursor_legacy@cursor-vs-flip-legacy.html

  * igt@kms_cursor_legacy@cursor-vs-flip-toggle:
    - shard-iclb:         [FAIL][42] ([i915#5072]) -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb7/igt@kms_cursor_legacy@cursor-vs-flip-toggle.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb8/igt@kms_cursor_legacy@cursor-vs-flip-toggle.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size:
    - shard-glk:          [FAIL][44] ([i915#2346] / [i915#533]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-glk4/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-glk1/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling:
    - shard-glk:          [FAIL][46] ([i915#4911]) -> [PASS][47]
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-glk8/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-glk4/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytileccs-upscaling.html

  * igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-16bpp-ytile-downscaling:
    - shard-iclb:         [SKIP][48] ([i915#3701]) -> [PASS][49] +2 similar issues
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb2/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-16bpp-ytile-downscaling.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb1/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-16bpp-ytile-downscaling.html

  * igt@kms_hdr@bpc-switch-dpms@pipe-a-dp-1:
    - shard-kbl:          [FAIL][50] ([i915#1188]) -> [PASS][51]
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-kbl4/igt@kms_hdr@bpc-switch-dpms@pipe-a-dp-1.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-kbl6/igt@kms_hdr@bpc-switch-dpms@pipe-a-dp-1.html

  * igt@kms_hdr@bpc-switch-suspend@pipe-a-dp-1:
    - shard-apl:          [DMESG-WARN][52] ([i915#180]) -> [PASS][53] +3 similar issues
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-apl3/igt@kms_hdr@bpc-switch-suspend@pipe-a-dp-1.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-apl8/igt@kms_hdr@bpc-switch-suspend@pipe-a-dp-1.html

  * igt@kms_plane_scaling@planes-downscale-factor-0-5@pipe-a-edp-1:
    - shard-iclb:         [SKIP][54] ([i915#5235]) -> [PASS][55] +2 similar issues
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb2/igt@kms_plane_scaling@planes-downscale-factor-0-5@pipe-a-edp-1.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb1/igt@kms_plane_scaling@planes-downscale-factor-0-5@pipe-a-edp-1.html

  * igt@kms_psr@psr2_sprite_mmap_gtt:
    - shard-iclb:         [SKIP][56] ([fdo#109441]) -> [PASS][57] +2 similar issues
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb4/igt@kms_psr@psr2_sprite_mmap_gtt.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb2/igt@kms_psr@psr2_sprite_mmap_gtt.html

  * igt@perf@polling-parameterized:
    - shard-tglb:         [FAIL][58] ([i915#5639]) -> [PASS][59]
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-tglb8/igt@perf@polling-parameterized.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-tglb8/igt@perf@polling-parameterized.html

  
#### Warnings ####

  * igt@gem_exec_balancer@parallel-ordering:
    - shard-iclb:         [FAIL][60] ([i915#6117]) -> [SKIP][61] ([i915#4525])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb4/igt@gem_exec_balancer@parallel-ordering.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb3/igt@gem_exec_balancer@parallel-ordering.html

  * igt@kms_psr2_sf@cursor-plane-move-continuous-exceed-sf:
    - shard-iclb:         [SKIP][62] ([i915#658]) -> [SKIP][63] ([i915#2920])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb4/igt@kms_psr2_sf@cursor-plane-move-continuous-exceed-sf.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb2/igt@kms_psr2_sf@cursor-plane-move-continuous-exceed-sf.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area:
    - shard-iclb:         [SKIP][64] ([i915#2920]) -> [SKIP][65] ([fdo#111068] / [i915#658])
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb2/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb1/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-big-fb:
    - shard-iclb:         [SKIP][66] ([i915#2920]) -> [SKIP][67] ([i915#658]) +1 similar issue
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11773/shard-iclb2/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-big-fb.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/shard-iclb1/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-big-fb.html

  
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2346]: https://gitlab.freedesktop.org/drm/intel/issues/2346
  [i915#2842]: https://gitlab.freedesktop.org/drm/intel/issues/2842
  [i915#2920]: https://gitlab.freedesktop.org/drm/intel/issues/2920
  [i915#3063]: https://gitlab.freedesktop.org/drm/intel/issues/3063
  [i915#3701]: https://gitlab.freedesktop.org/drm/intel/issues/3701
  [i915#4275]: https://gitlab.freedesktop.org/drm/intel/issues/4275
  [i915#4409]: https://gitlab.freedesktop.org/drm/intel/issues/4409
  [i915#4525]: https://gitlab.freedesktop.org/drm/intel/issues/4525
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#4911]: https://gitlab.freedesktop.org/drm/intel/issues/4911
  [i915#5072]: https://gitlab.freedesktop.org/drm/intel/issues/5072
  [i915#5235]: https://gitlab.freedesktop.org/drm/intel/issues/5235
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#5519]: https://gitlab.freedesktop.org/drm/intel/issues/5519
  [i915#5639]: https://gitlab.freedesktop.org/drm/intel/issues/5639
  [i915#6117]: https://gitlab.freedesktop.org/drm/intel/issues/6117
  [i915#6140]: https://gitlab.freedesktop.org/drm/intel/issues/6140
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658


Build changes
-------------

  * Linux: CI_DRM_11773 -> Patchwork_105258v1

  CI-20190529: 20190529
  CI_DRM_11773: 8025a295b7aa707f64c7984b7781c6f25e22a901 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6533: 6b5107d91827962808441db6b98e478aa9e67bdb @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_105258v1: 8025a295b7aa707f64c7984b7781c6f25e22a901 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_105258v1/index.html

[-- Attachment #2: Type: text/html, Size: 18417 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while waiting for response
  2022-06-17  4:50 ` Dixit, Ashutosh
@ 2022-07-12 19:47   ` Dixit, Ashutosh
  2022-07-13 21:45       ` Dong, Zhanjun
  0 siblings, 1 reply; 10+ messages in thread
From: Dixit, Ashutosh @ 2022-07-12 19:47 UTC (permalink / raw)
  To: Zhanjun Dong; +Cc: intel-gfx, dri-devel

On Thu, 16 Jun 2022 21:50:55 -0700, Dixit, Ashutosh wrote:
>
> On Thu, 16 Jun 2022 15:01:59 -0700, Zhanjun Dong wrote:
> >
> > We are seeing error message of "No response for request". Some cases
> > happened while waiting for response and reset/suspend action was triggered.
> > In this case, no response is not an error, active requests will be
> > cancelled.
> >
> > This patch will handle this condition and change the error message into
> > debug message.
> >
> > Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 ++++++++++++++++-------
> >  1 file changed, 17 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > index f01325cd1b62..f07a7666b1ad 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > @@ -455,6 +455,7 @@ static int ct_write(struct intel_guc_ct *ct,
> >
> >  /**
> >   * wait_for_ct_request_update - Wait for CT request state update.
> > + * @ct:		pointer to CT
> >   * @req:	pointer to pending request
> >   * @status:	placeholder for status
> >   *
> > @@ -467,9 +468,10 @@ static int ct_write(struct intel_guc_ct *ct,
> >   * *	0 response received (status is valid)
> >   * *	-ETIMEDOUT no response within hardcoded timeout
> >   */
> > -static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
> > +static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status)
> >  {
> >	int err;
> > +	bool ct_enabled;
> >
> >	/*
> >	 * Fast commands should complete in less than 10us, so sample quickly
> > @@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
> >  #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
> >  #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
> >  #define done \
> > -	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
> > +	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
> > +	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
> >	 GUC_HXG_ORIGIN_GUC)
> >	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
> >	if (err)
> >		err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
> >  #undef done
> > +	if (!ct_enabled)
> > +		err = -ECANCELED;
>
> Actually here's an even simpler suggestion. We could just do:
>
>	if (!ct_enabled)
>		CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n", ...);
>
> And return 0 as before. This way we won't have to make any changes in
> either ct_send() or intel_guc_ct_send(). So intel_guc_ct_enabled() just
> serves to get us out of the wait early and prevent the -ETIMEDOUT return
> (and 0 return avoids all the error messages we are trying to eliminate).

Actually will need to unlink the request too, so it will be something like:

	if (!ct_enabled) {
		CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n", ...);

		spin_lock_irqsave(&ct->requests.lock, flags);
		list_del(&request.link);
		spin_unlock_irqrestore(&ct->requests.lock, flags);
	}

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while waiting for response
  2022-07-12 19:47   ` Dixit, Ashutosh
@ 2022-07-13 21:45       ` Dong, Zhanjun
  0 siblings, 0 replies; 10+ messages in thread
From: Dong, Zhanjun @ 2022-07-13 21:45 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: intel-gfx, dri-devel



> -----Original Message-----
> From: Dixit, Ashutosh <ashutosh.dixit@intel.com>
> Sent: July 12, 2022 3:48 PM
> To: Dong, Zhanjun <zhanjun.dong@intel.com>
> Cc: intel-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while
> waiting for response
> 
> On Thu, 16 Jun 2022 21:50:55 -0700, Dixit, Ashutosh wrote:
> >
> > On Thu, 16 Jun 2022 15:01:59 -0700, Zhanjun Dong wrote:
> > >
> > > We are seeing error message of "No response for request". Some cases
> > > happened while waiting for response and reset/suspend action was
> triggered.
> > > In this case, no response is not an error, active requests will be
> > > cancelled.
> > >
> > > This patch will handle this condition and change the error message
> > > into debug message.
> > >
> > > Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24
> > > ++++++++++++++++-------
> > >  1 file changed, 17 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > > index f01325cd1b62..f07a7666b1ad 100644
> > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > > @@ -455,6 +455,7 @@ static int ct_write(struct intel_guc_ct *ct,
> > >
> > >  /**
> > >   * wait_for_ct_request_update - Wait for CT request state update.
> > > + * @ct:		pointer to CT
> > >   * @req:	pointer to pending request
> > >   * @status:	placeholder for status
> > >   *
> > > @@ -467,9 +468,10 @@ static int ct_write(struct intel_guc_ct *ct,
> > >   * *	0 response received (status is valid)
> > >   * *	-ETIMEDOUT no response within hardcoded timeout
> > >   */
> > > -static int wait_for_ct_request_update(struct ct_request *req, u32
> > > *status)
> > > +static int wait_for_ct_request_update(struct intel_guc_ct *ct,
> > > +struct ct_request *req, u32 *status)
> > >  {
> > >	int err;
> > > +	bool ct_enabled;
> > >
> > >	/*
> > >	 * Fast commands should complete in less than 10us, so sample
> > >quickly  @@ -481,12 +483,15 @@ static int
> > >wait_for_ct_request_update(struct ct_request *req, u32 *status)
> > >  #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
> > >  #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
> > >  #define done \
> > > -	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) ==
> \
> > > +	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
> > > +	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) ==
> \
> > >	 GUC_HXG_ORIGIN_GUC)
> > >	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
> > >	if (err)
> > >		err = wait_for(done,
> GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
> > >  #undef done
> > > +	if (!ct_enabled)
> > > +		err = -ECANCELED;
> >
> > Actually here's an even simpler suggestion. We could just do:
> >
> >	if (!ct_enabled)
> >		CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is
> disabled\n",
> >...);
> >
> > And return 0 as before. This way we won't have to make any changes in
> > either ct_send() or intel_guc_ct_send(). So intel_guc_ct_enabled()
> > just serves to get us out of the wait early and prevent the -ETIMEDOUT
> > return (and 0 return avoids all the error messages we are trying to
> eliminate).
> 
> Actually will need to unlink the request too, so it will be something like:
> 
> 	if (!ct_enabled) {
> 		CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is
> disabled\n", ...);
> 
> 		spin_lock_irqsave(&ct->requests.lock, flags);
> 		list_del(&request.link);
> 		spin_unlock_irqrestore(&ct->requests.lock, flags);
> 	}

I agree, the caller function need the err is non-zero to know the request is not success, and unlink the request.
The caller function ct_send will do the unlink.

For the err code ECANCELED, while in intel_guc_ct_send, it returns ENODEV if ct is disabled. This patch will be changed to ENODEV to match it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while waiting for response
@ 2022-07-13 21:45       ` Dong, Zhanjun
  0 siblings, 0 replies; 10+ messages in thread
From: Dong, Zhanjun @ 2022-07-13 21:45 UTC (permalink / raw)
  To: Dixit, Ashutosh; +Cc: intel-gfx, dri-devel



> -----Original Message-----
> From: Dixit, Ashutosh <ashutosh.dixit@intel.com>
> Sent: July 12, 2022 3:48 PM
> To: Dong, Zhanjun <zhanjun.dong@intel.com>
> Cc: intel-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH] drm/i915/guc: Check for ct enabled while
> waiting for response
> 
> On Thu, 16 Jun 2022 21:50:55 -0700, Dixit, Ashutosh wrote:
> >
> > On Thu, 16 Jun 2022 15:01:59 -0700, Zhanjun Dong wrote:
> > >
> > > We are seeing error message of "No response for request". Some cases
> > > happened while waiting for response and reset/suspend action was
> triggered.
> > > In this case, no response is not an error, active requests will be
> > > cancelled.
> > >
> > > This patch will handle this condition and change the error message
> > > into debug message.
> > >
> > > Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24
> > > ++++++++++++++++-------
> > >  1 file changed, 17 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > > b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > > index f01325cd1b62..f07a7666b1ad 100644
> > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > > @@ -455,6 +455,7 @@ static int ct_write(struct intel_guc_ct *ct,
> > >
> > >  /**
> > >   * wait_for_ct_request_update - Wait for CT request state update.
> > > + * @ct:		pointer to CT
> > >   * @req:	pointer to pending request
> > >   * @status:	placeholder for status
> > >   *
> > > @@ -467,9 +468,10 @@ static int ct_write(struct intel_guc_ct *ct,
> > >   * *	0 response received (status is valid)
> > >   * *	-ETIMEDOUT no response within hardcoded timeout
> > >   */
> > > -static int wait_for_ct_request_update(struct ct_request *req, u32
> > > *status)
> > > +static int wait_for_ct_request_update(struct intel_guc_ct *ct,
> > > +struct ct_request *req, u32 *status)
> > >  {
> > >	int err;
> > > +	bool ct_enabled;
> > >
> > >	/*
> > >	 * Fast commands should complete in less than 10us, so sample
> > >quickly  @@ -481,12 +483,15 @@ static int
> > >wait_for_ct_request_update(struct ct_request *req, u32 *status)
> > >  #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
> > >  #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
> > >  #define done \
> > > -	(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) ==
> \
> > > +	(!(ct_enabled = intel_guc_ct_enabled(ct)) || \
> > > +	 FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) ==
> \
> > >	 GUC_HXG_ORIGIN_GUC)
> > >	err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
> > >	if (err)
> > >		err = wait_for(done,
> GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
> > >  #undef done
> > > +	if (!ct_enabled)
> > > +		err = -ECANCELED;
> >
> > Actually here's an even simpler suggestion. We could just do:
> >
> >	if (!ct_enabled)
> >		CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is
> disabled\n",
> >...);
> >
> > And return 0 as before. This way we won't have to make any changes in
> > either ct_send() or intel_guc_ct_send(). So intel_guc_ct_enabled()
> > just serves to get us out of the wait early and prevent the -ETIMEDOUT
> > return (and 0 return avoids all the error messages we are trying to
> eliminate).
> 
> Actually will need to unlink the request too, so it will be something like:
> 
> 	if (!ct_enabled) {
> 		CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is
> disabled\n", ...);
> 
> 		spin_lock_irqsave(&ct->requests.lock, flags);
> 		list_del(&request.link);
> 		spin_unlock_irqrestore(&ct->requests.lock, flags);
> 	}

I agree, the caller function need the err is non-zero to know the request is not success, and unlink the request.
The caller function ct_send will do the unlink.

For the err code ECANCELED, while in intel_guc_ct_send, it returns ENODEV if ct is disabled. This patch will be changed to ENODEV to match it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-07-13 21:45 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-16 22:01 [PATCH] drm/i915/guc: Check for ct enabled while waiting for response Zhanjun Dong
2022-06-16 22:01 ` [Intel-gfx] " Zhanjun Dong
2022-06-17  1:20 ` [Intel-gfx] ✓ Fi.CI.BAT: success for " Patchwork
2022-06-17  4:42 ` [Intel-gfx] [PATCH] " Dixit, Ashutosh
2022-06-17  4:42   ` Dixit, Ashutosh
2022-06-17  4:50 ` Dixit, Ashutosh
2022-07-12 19:47   ` Dixit, Ashutosh
2022-07-13 21:45     ` Dong, Zhanjun
2022-07-13 21:45       ` Dong, Zhanjun
2022-06-17 11:35 ` [Intel-gfx] ✓ Fi.CI.IGT: success for " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.