All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2)
@ 2020-02-27 11:38 Huang Rui
  2020-02-27 11:47 ` Christian König
  0 siblings, 1 reply; 5+ messages in thread
From: Huang Rui @ 2020-02-27 11:38 UTC (permalink / raw)
  To: amd-gfx
  Cc: Pierre-Eric Pelloux-Prayer, Huang Rui, Aaron Liu, Luben Tuikov,
	Alex Deucher, Christian König

Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
This causes hangs on some hardware (eg: Raven1). This patch restores the
unconditionnal frame control packets issuing, that's to keep the per-IB logic
regarding the secure flag.

Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)

v1 -> v2:
As suggested by Luben, and accept part of implemetation from this patch:
- Put "secure" closed to the loop and use optimization
- Change "secure" to bool again, and move "secure == -1" out of loop.

Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 29 +++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++++------
 4 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4b2342d..0f4909a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -216,7 +216,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}
 
+	/* Setup initial TMZiness and send it off.
+	 */
 	secure = false;
+	if (job && ring->funcs->emit_frame_cntl) {
+		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE ? true : false;
+		amdgpu_ring_emit_frame_cntl(ring, true, secure);
+	}
+
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
 
@@ -228,27 +235,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
 			continue;
 
-		/* If this IB is TMZ, add frame TMZ start packet,
-		 * else, turn off TMZ.
-		 */
-		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
-			if (!secure) {
-				secure = true;
-				amdgpu_ring_emit_tmz(ring, true);
+		if (job && ring->funcs->emit_frame_cntl) {
+			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+				amdgpu_ring_emit_frame_cntl(ring, false, secure);
+				secure = !secure;
+				amdgpu_ring_emit_frame_cntl(ring, true, secure);
 			}
-		} else if (secure) {
-			secure = false;
-			amdgpu_ring_emit_tmz(ring, false);
 		}
 
 		amdgpu_ring_emit_ib(ring, job, ib, status);
 		status &= ~AMDGPU_HAVE_CTX_SWITCH;
 	}
 
-	if (secure) {
-		secure = false;
-		amdgpu_ring_emit_tmz(ring, false);
-	}
+	if (job && ring->funcs->emit_frame_cntl)
+		amdgpu_ring_emit_frame_cntl(ring, false,
+					    secure ? true : false);
 
 #ifdef CONFIG_X86_64
 	if (!(adev->flags & AMD_IS_APU))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 24caff0..4d019d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
 	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
 					uint32_t reg0, uint32_t reg1,
 					uint32_t ref, uint32_t mask);
-	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
+	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+				bool secure);
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
 			      enum drm_sched_priority priority);
@@ -247,7 +248,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
-#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7b61583..748ac35 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
 {
@@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 					   sizeof(de_payload) >> 2);
 }
 
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				    bool secure)
 {
-	if (amdgpu_is_tmz(ring->adev)) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-	}
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
@@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
 	.preempt_ib = gfx_v10_0_ring_preempt_ib,
-	.emit_tmz = gfx_v10_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1c7a16b..fbde712 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
 }
 
-static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				   bool secure)
 {
-	if (amdgpu_is_tmz(ring->adev)) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-	}
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
@@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
-	.emit_tmz = gfx_v9_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2)
  2020-02-27 11:38 [PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2) Huang Rui
@ 2020-02-27 11:47 ` Christian König
  2020-02-27 12:06   ` Huang Rui
  2020-02-27 22:10   ` Luben Tuikov
  0 siblings, 2 replies; 5+ messages in thread
From: Christian König @ 2020-02-27 11:47 UTC (permalink / raw)
  To: Huang Rui, amd-gfx
  Cc: Alex Deucher, Pierre-Eric Pelloux-Prayer, Luben Tuikov, Aaron Liu

Am 27.02.20 um 12:38 schrieb Huang Rui:
> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
> This causes hangs on some hardware (eg: Raven1). This patch restores the
> unconditionnal frame control packets issuing, that's to keep the per-IB logic
> regarding the secure flag.
>
> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
>
> v1 -> v2:
> As suggested by Luben, and accept part of implemetation from this patch:
> - Put "secure" closed to the loop and use optimization
> - Change "secure" to bool again, and move "secure == -1" out of loop.
>
> Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 29 +++++++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++++-------
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++++------
>   4 files changed, 33 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4b2342d..0f4909a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -216,7 +216,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		amdgpu_ring_emit_cntxcntl(ring, status);
>   	}
>   
> +	/* Setup initial TMZiness and send it off.
> +	 */
>   	secure = false;
> +	if (job && ring->funcs->emit_frame_cntl) {

Does anybody remember why we check the job here in the first place?

Independent of that I think that the check if 
ring->funcs->emit_frame_cntl should be moved into the 
amdgpu_ring_emit_frame_cntl() function so that we don't need to repeat 
that over and over again.

If amdgpu_ring_emit_frame_cntl() is still a macro then that is probably 
also a good opportunity to change that.

> +		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE ? true : false;
> +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
> +	}
> +
>   	for (i = 0; i < num_ibs; ++i) {
>   		ib = &ibs[i];
>   
> @@ -228,27 +235,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>   			continue;
>   
> -		/* If this IB is TMZ, add frame TMZ start packet,
> -		 * else, turn off TMZ.
> -		 */
> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> -			if (!secure) {
> -				secure = true;
> -				amdgpu_ring_emit_tmz(ring, true);
> +		if (job && ring->funcs->emit_frame_cntl) {
> +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {

Maybe write this as (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)). 
That is probably easier to understand.

Regards,
Christian.

> +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
> +				secure = !secure;
> +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
>   			}
> -		} else if (secure) {
> -			secure = false;
> -			amdgpu_ring_emit_tmz(ring, false);
>   		}
>   
>   		amdgpu_ring_emit_ib(ring, job, ib, status);
>   		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>   	}
>   
> -	if (secure) {
> -		secure = false;
> -		amdgpu_ring_emit_tmz(ring, false);
> -	}
> +	if (job && ring->funcs->emit_frame_cntl)
> +		amdgpu_ring_emit_frame_cntl(ring, false,
> +					    secure ? true : false);
>   
>   #ifdef CONFIG_X86_64
>   	if (!(adev->flags & AMD_IS_APU))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 24caff0..4d019d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>   	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>   					uint32_t reg0, uint32_t reg1,
>   					uint32_t ref, uint32_t mask);
> -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
> +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
> +				bool secure);
>   	/* priority functions */
>   	void (*set_priority) (struct amdgpu_ring *ring,
>   			      enum drm_sched_priority priority);
> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>   #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>   #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>   #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 7b61583..748ac35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>   static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>   static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>   static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>   
>   static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>   {
> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>   					   sizeof(de_payload) >> 2);
>   }
>   
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +				    bool secure)
>   {
> -	if (amdgpu_is_tmz(ring->adev)) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -	}
> +	uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>   }
>   
>   static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>   	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>   	.preempt_ib = gfx_v10_0_ring_preempt_ib,
> -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
> +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>   	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 1c7a16b..fbde712 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>   	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>   }
>   
> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +				   bool secure)
>   {
> -	if (amdgpu_is_tmz(ring->adev)) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -	}
> +	uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>   }
>   
>   static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>   	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>   	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>   	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>   	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2)
  2020-02-27 11:47 ` Christian König
@ 2020-02-27 12:06   ` Huang Rui
  2020-02-27 22:10   ` Luben Tuikov
  1 sibling, 0 replies; 5+ messages in thread
From: Huang Rui @ 2020-02-27 12:06 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Deucher, Alexander, Pelloux-prayer, Pierre-eric, Tuikov, Luben,
	Liu, Aaron, amd-gfx

On Thu, Feb 27, 2020 at 07:47:17PM +0800, Koenig, Christian wrote:
> Am 27.02.20 um 12:38 schrieb Huang Rui:
> > Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
> > This causes hangs on some hardware (eg: Raven1). This patch restores the
> > unconditionnal frame control packets issuing, that's to keep the per-IB logic
> > regarding the secure flag.
> >
> > Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
> >
> > v1 -> v2:
> > As suggested by Luben, and accept part of implemetation from this patch:
> > - Put "secure" closed to the loop and use optimization
> > - Change "secure" to bool again, and move "secure == -1" out of loop.
> >
> > Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> > Signed-off-by: Huang Rui <ray.huang@amd.com>
> > Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 29 +++++++++++++++--------------
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++++-------
> >   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++++------
> >   4 files changed, 33 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > index 4b2342d..0f4909a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > @@ -216,7 +216,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> >   		amdgpu_ring_emit_cntxcntl(ring, status);
> >   	}
> >   
> > +	/* Setup initial TMZiness and send it off.
> > +	 */
> >   	secure = false;
> > +	if (job && ring->funcs->emit_frame_cntl) {
> 
> Does anybody remember why we check the job here in the first place?

Align with previous logic, we only issue this packet while it's user mode
submission. Looks ib tests needn't this.

> 
> Independent of that I think that the check if 
> ring->funcs->emit_frame_cntl should be moved into the 
> amdgpu_ring_emit_frame_cntl() function so that we don't need to repeat 
> that over and over again.
> 
> If amdgpu_ring_emit_frame_cntl() is still a macro then that is probably 
> also a good opportunity to change that.

Agree, this looks better.

> 
> > +		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE ? true : false;
> > +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
> > +	}
> > +
> >   	for (i = 0; i < num_ibs; ++i) {
> >   		ib = &ibs[i];
> >   
> > @@ -228,27 +235,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> >   		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> >   			continue;
> >   
> > -		/* If this IB is TMZ, add frame TMZ start packet,
> > -		 * else, turn off TMZ.
> > -		 */
> > -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> > -			if (!secure) {
> > -				secure = true;
> > -				amdgpu_ring_emit_tmz(ring, true);
> > +		if (job && ring->funcs->emit_frame_cntl) {
> > +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> 
> Maybe write this as (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)). 
> That is probably easier to understand.

Actually, I spend quit a few minutes to understand previous checking. :-)
I am fine to change if Lunben has no objection.

Thanks,
Ray

> 
> Regards,
> Christian.
> 
> > +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
> > +				secure = !secure;
> > +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
> >   			}
> > -		} else if (secure) {
> > -			secure = false;
> > -			amdgpu_ring_emit_tmz(ring, false);
> >   		}
> >   
> >   		amdgpu_ring_emit_ib(ring, job, ib, status);
> >   		status &= ~AMDGPU_HAVE_CTX_SWITCH;
> >   	}
> >   
> > -	if (secure) {
> > -		secure = false;
> > -		amdgpu_ring_emit_tmz(ring, false);
> > -	}
> > +	if (job && ring->funcs->emit_frame_cntl)
> > +		amdgpu_ring_emit_frame_cntl(ring, false,
> > +					    secure ? true : false);
> >   
> >   #ifdef CONFIG_X86_64
> >   	if (!(adev->flags & AMD_IS_APU))
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > index 24caff0..4d019d6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
> >   	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
> >   					uint32_t reg0, uint32_t reg1,
> >   					uint32_t ref, uint32_t mask);
> > -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
> > +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
> > +				bool secure);
> >   	/* priority functions */
> >   	void (*set_priority) (struct amdgpu_ring *ring,
> >   			      enum drm_sched_priority priority);
> > @@ -247,7 +248,7 @@ struct amdgpu_ring {
> >   #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
> >   #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
> >   #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
> > -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> > +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
> >   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
> >   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
> >   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 7b61583..748ac35 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
> >   static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
> >   static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
> >   static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
> > -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
> > +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
> >   
> >   static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
> >   {
> > @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
> >   					   sizeof(de_payload) >> 2);
> >   }
> >   
> > -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> > +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> > +				    bool secure)
> >   {
> > -	if (amdgpu_is_tmz(ring->adev)) {
> > -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> > -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> > -	}
> > +	uint32_t v = secure ? FRAME_TMZ : 0;
> > +
> > +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> > +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
> >   }
> >   
> >   static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
> > @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
> >   	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
> >   	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
> >   	.preempt_ib = gfx_v10_0_ring_preempt_ib,
> > -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
> > +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
> >   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
> >   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> >   	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 1c7a16b..fbde712 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
> >   	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
> >   }
> >   
> > -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> > +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> > +				   bool secure)
> >   {
> > -	if (amdgpu_is_tmz(ring->adev)) {
> > -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> > -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> > -	}
> > +	uint32_t v = secure ? FRAME_TMZ : 0;
> > +
> > +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> > +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
> >   }
> >   
> >   static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> > @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
> >   	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
> >   	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
> >   	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> > -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
> > +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
> >   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
> >   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> >   	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2)
  2020-02-27 11:47 ` Christian König
  2020-02-27 12:06   ` Huang Rui
@ 2020-02-27 22:10   ` Luben Tuikov
  2020-02-28  0:40     ` Luben Tuikov
  1 sibling, 1 reply; 5+ messages in thread
From: Luben Tuikov @ 2020-02-27 22:10 UTC (permalink / raw)
  To: Christian König, Huang Rui, amd-gfx
  Cc: Alex Deucher, Pierre-Eric Pelloux-Prayer, Tuikov, Luben, Aaron Liu

On 2020-02-27 6:47 a.m., Christian König wrote:
> Am 27.02.20 um 12:38 schrieb Huang Rui:
>> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
>> This causes hangs on some hardware (eg: Raven1). This patch restores the
>> unconditionnal frame control packets issuing, that's to keep the per-IB logic
>> regarding the secure flag.
>>
>> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
>>
>> v1 -> v2:
>> As suggested by Luben, and accept part of implemetation from this patch:
>> - Put "secure" closed to the loop and use optimization
>> - Change "secure" to bool again, and move "secure == -1" out of loop.
>>
>> Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>> Signed-off-by: Huang Rui <ray.huang@amd.com>
>> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 29 +++++++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++++-------
>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++++------
>>   4 files changed, 33 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 4b2342d..0f4909a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -216,7 +216,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>   		amdgpu_ring_emit_cntxcntl(ring, status);
>>   	}
>>   
>> +	/* Setup initial TMZiness and send it off.
>> +	 */
>>   	secure = false;
>> +	if (job && ring->funcs->emit_frame_cntl) {
> 
> Does anybody remember why we check the job here in the first place?

Ring tests do not use a job.

Having said this, I would've written a separate
submit for tests and a different one for jobs.

> 
> Independent of that I think that the check if 
> ring->funcs->emit_frame_cntl should be moved into the 
> amdgpu_ring_emit_frame_cntl() function so that we don't need to repeat 
> that over and over again.

What if that function is called from a loop? Since it would
be a static conditional it could be optimized away early on.
It's a classic optimization pattern.

> 
> If amdgpu_ring_emit_frame_cntl() is still a macro then that is probably 
> also a good opportunity to change that.
> 
>> +		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE ? true : false;
>> +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
>> +	}
>> +
>>   	for (i = 0; i < num_ibs; ++i) {
>>   		ib = &ibs[i];
>>   
>> @@ -228,27 +235,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>   		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>>   			continue;
>>   
>> -		/* If this IB is TMZ, add frame TMZ start packet,
>> -		 * else, turn off TMZ.
>> -		 */
>> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
>> -			if (!secure) {
>> -				secure = true;
>> -				amdgpu_ring_emit_tmz(ring, true);
>> +		if (job && ring->funcs->emit_frame_cntl) {
>> +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> 
> Maybe write this as (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)). 
> That is probably easier to understand.

Thanks! This is indeed better!

I'll resubmit with your suggestions and another good one from Tim.

Regards,
Luben


> 
> Regards,
> Christian.
> 
>> +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
>> +				secure = !secure;
>> +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
>>   			}
>> -		} else if (secure) {
>> -			secure = false;
>> -			amdgpu_ring_emit_tmz(ring, false);
>>   		}
>>   
>>   		amdgpu_ring_emit_ib(ring, job, ib, status);
>>   		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>>   	}
>>   
>> -	if (secure) {
>> -		secure = false;
>> -		amdgpu_ring_emit_tmz(ring, false);
>> -	}
>> +	if (job && ring->funcs->emit_frame_cntl)
>> +		amdgpu_ring_emit_frame_cntl(ring, false,
>> +					    secure ? true : false);
>>   
>>   #ifdef CONFIG_X86_64
>>   	if (!(adev->flags & AMD_IS_APU))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index 24caff0..4d019d6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>>   	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>>   					uint32_t reg0, uint32_t reg1,
>>   					uint32_t ref, uint32_t mask);
>> -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
>> +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
>> +				bool secure);
>>   	/* priority functions */
>>   	void (*set_priority) (struct amdgpu_ring *ring,
>>   			      enum drm_sched_priority priority);
>> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>>   #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>>   #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>>   #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
>> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>>   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index 7b61583..748ac35 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>>   static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>>   static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>>   static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>>   
>>   static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>>   {
>> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>>   					   sizeof(de_payload) >> 2);
>>   }
>>   
>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>> +				    bool secure)
>>   {
>> -	if (amdgpu_is_tmz(ring->adev)) {
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
>> -	}
>> +	uint32_t v = secure ? FRAME_TMZ : 0;
>> +
>> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>>   }
>>   
>>   static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
>> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>>   	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>>   	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>>   	.preempt_ib = gfx_v10_0_ring_preempt_ib,
>> -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>> +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>>   	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 1c7a16b..fbde712 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>>   	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>>   }
>>   
>> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
>> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>> +				   bool secure)
>>   {
>> -	if (amdgpu_is_tmz(ring->adev)) {
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
>> -	}
>> +	uint32_t v = secure ? FRAME_TMZ : 0;
>> +
>> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>>   }
>>   
>>   static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
>> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>>   	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>>   	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>>   	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
>> -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
>> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>>   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>>   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>>   	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2)
  2020-02-27 22:10   ` Luben Tuikov
@ 2020-02-28  0:40     ` Luben Tuikov
  0 siblings, 0 replies; 5+ messages in thread
From: Luben Tuikov @ 2020-02-28  0:40 UTC (permalink / raw)
  To: Christian König, Huang Rui, amd-gfx
  Cc: Alex Deucher, Pierre-Eric Pelloux-Prayer, Aaron Liu

On 2020-02-27 5:10 p.m., Luben Tuikov wrote:
> On 2020-02-27 6:47 a.m., Christian König wrote:
>> Am 27.02.20 um 12:38 schrieb Huang Rui:
>>> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
>>> This causes hangs on some hardware (eg: Raven1). This patch restores the
>>> unconditionnal frame control packets issuing, that's to keep the per-IB logic
>>> regarding the secure flag.
>>>
>>> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
>>>
>>> v1 -> v2:
>>> As suggested by Luben, and accept part of implemetation from this patch:
>>> - Put "secure" closed to the loop and use optimization
>>> - Change "secure" to bool again, and move "secure == -1" out of loop.
>>>
>>> Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>>> Signed-off-by: Huang Rui <ray.huang@amd.com>
>>> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 29 +++++++++++++++--------------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++--
>>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++++-------
>>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++++------
>>>   4 files changed, 33 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> index 4b2342d..0f4909a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>>> @@ -216,7 +216,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>>   		amdgpu_ring_emit_cntxcntl(ring, status);
>>>   	}
>>>   
>>> +	/* Setup initial TMZiness and send it off.
>>> +	 */
>>>   	secure = false;
>>> +	if (job && ring->funcs->emit_frame_cntl) {
>>
>> Does anybody remember why we check the job here in the first place?
> 
> Ring tests do not use a job.
> 
> Having said this, I would've written a separate
> submit for tests and a different one for jobs.
> 
>>
>> Independent of that I think that the check if 
>> ring->funcs->emit_frame_cntl should be moved into the 
>> amdgpu_ring_emit_frame_cntl() function so that we don't need to repeat 
>> that over and over again.
> 
> What if that function is called from a loop? Since it would
> be a static conditional it could be optimized away early on.
> It's a classic optimization pattern.
> 
>>
>> If amdgpu_ring_emit_frame_cntl() is still a macro then that is probably 
>> also a good opportunity to change that.
>>
>>> +		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE ? true : false;

Tim says no need for the ternary here.

>>> +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
>>> +	}
>>> +
>>>   	for (i = 0; i < num_ibs; ++i) {
>>>   		ib = &ibs[i];
>>>   
>>> @@ -228,27 +235,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>>   		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>>>   			continue;
>>>   
>>> -		/* If this IB is TMZ, add frame TMZ start packet,
>>> -		 * else, turn off TMZ.
>>> -		 */
>>> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
>>> -			if (!secure) {
>>> -				secure = true;
>>> -				amdgpu_ring_emit_tmz(ring, true);
>>> +		if (job && ring->funcs->emit_frame_cntl) {
>>> +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
>>
>> Maybe write this as (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)). 
>> That is probably easier to understand.
> 
> Thanks! This is indeed better!
> 
> I'll resubmit with your suggestions and another good one from Tim.

Sorry, I thought this was a comment on my patch as the expression was taken
right out of it.

>>
>>> +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
>>> +				secure = !secure;
>>> +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
>>>   			}
>>> -		} else if (secure) {
>>> -			secure = false;
>>> -			amdgpu_ring_emit_tmz(ring, false);
>>>   		}
>>>   
>>>   		amdgpu_ring_emit_ib(ring, job, ib, status);
>>>   		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>>>   	}
>>>   
>>> -	if (secure) {
>>> -		secure = false;
>>> -		amdgpu_ring_emit_tmz(ring, false);
>>> -	}
>>> +	if (job && ring->funcs->emit_frame_cntl)
>>> +		amdgpu_ring_emit_frame_cntl(ring, false,
>>> +					    secure ? true : false);

"secure" is already a boolean.

Regards,
Luben


>>>   
>>>   #ifdef CONFIG_X86_64
>>>   	if (!(adev->flags & AMD_IS_APU))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> index 24caff0..4d019d6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>>>   	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>>>   					uint32_t reg0, uint32_t reg1,
>>>   					uint32_t ref, uint32_t mask);
>>> -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
>>> +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
>>> +				bool secure);
>>>   	/* priority functions */
>>>   	void (*set_priority) (struct amdgpu_ring *ring,
>>>   			      enum drm_sched_priority priority);
>>> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>>>   #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>>>   #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>>>   #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
>>> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>>> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>>>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>>>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>>>   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> index 7b61583..748ac35 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>>>   static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>>>   static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>>>   static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
>>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
>>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>>>   
>>>   static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>>>   {
>>> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>>>   					   sizeof(de_payload) >> 2);
>>>   }
>>>   
>>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
>>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>>> +				    bool secure)
>>>   {
>>> -	if (amdgpu_is_tmz(ring->adev)) {
>>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>>> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
>>> -	}
>>> +	uint32_t v = secure ? FRAME_TMZ : 0;
>>> +
>>> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>>> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>>>   }
>>>   
>>>   static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
>>> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>>>   	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>>>   	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>>>   	.preempt_ib = gfx_v10_0_ring_preempt_ib,
>>> -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>>> +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>>>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>>>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>>>   	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index 1c7a16b..fbde712 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>>>   	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>>>   }
>>>   
>>> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
>>> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>>> +				   bool secure)
>>>   {
>>> -	if (amdgpu_is_tmz(ring->adev)) {
>>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>>> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
>>> -	}
>>> +	uint32_t v = secure ? FRAME_TMZ : 0;
>>> +
>>> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>>> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>>>   }
>>>   
>>>   static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
>>> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>>>   	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>>>   	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>>>   	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
>>> -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
>>> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>>>   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>>>   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>>>   	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
>>
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-02-28  0:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-27 11:38 [PATCH v2] drm/amdgpu: fix the gfx hang while use per-ib secure flag (v2) Huang Rui
2020-02-27 11:47 ` Christian König
2020-02-27 12:06   ` Huang Rui
2020-02-27 22:10   ` Luben Tuikov
2020-02-28  0:40     ` Luben Tuikov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.