All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag
@ 2020-02-25 13:57 Huang Rui
  2020-02-25 14:10 ` Pierre-Eric Pelloux-Prayer
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Huang Rui @ 2020-02-25 13:57 UTC (permalink / raw)
  To: amd-gfx
  Cc: Pierre-Eric Pelloux-Prayer, Huang Rui, Aaron Liu, Luben Tuikov,
	Alex Deucher, Christian König

Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
This causes hangs on some hardware (eg: Raven1). This patch restores the
unconditionnal frame control packets issuing, that's to keep the per-IB logic
regarding the secure flag.

Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)

Reported-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 41 +++++++++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++-----
 4 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4b2342d..9713a7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	uint64_t fence_ctx;
 	uint32_t status = 0, alloc_size;
 	unsigned fence_flags = 0;
-	bool secure;
+	int secure = -1;
 
 	unsigned i;
 	int r = 0;
@@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}
 
-	secure = false;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
 
@@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
 			continue;
 
-		/* If this IB is TMZ, add frame TMZ start packet,
-		 * else, turn off TMZ.
-		 */
-		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
-			if (!secure) {
-				secure = true;
-				amdgpu_ring_emit_tmz(ring, true);
+		if (job && ring->funcs->emit_frame_cntl) {
+			if (secure == -1) {
+				if (ib->flags & AMDGPU_IB_FLAGS_SECURE) {
+					secure = 1;
+					amdgpu_ring_emit_frame_cntl(ring, true, true);
+				} else {
+					secure = 0;
+					amdgpu_ring_emit_frame_cntl(ring, true, false);
+				}
+			} else {
+				if (secure == 1 &&
+				    !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+					secure = 0;
+					amdgpu_ring_emit_frame_cntl(ring, false, true);
+					amdgpu_ring_emit_frame_cntl(ring, true, false);
+				} else if (secure == 0 &&
+					   ib->flags & AMDGPU_IB_FLAGS_SECURE) {
+					secure = 1;
+					amdgpu_ring_emit_frame_cntl(ring, false, false);
+					amdgpu_ring_emit_frame_cntl(ring, true, true);
+				}
 			}
-		} else if (secure) {
-			secure = false;
-			amdgpu_ring_emit_tmz(ring, false);
 		}
 
 		amdgpu_ring_emit_ib(ring, job, ib, status);
 		status &= ~AMDGPU_HAVE_CTX_SWITCH;
 	}
 
-	if (secure) {
-		secure = false;
-		amdgpu_ring_emit_tmz(ring, false);
-	}
+	if (job && ring->funcs->emit_frame_cntl)
+		amdgpu_ring_emit_frame_cntl(ring, false,
+					    (secure == 1) ? true : false);
 
 #ifdef CONFIG_X86_64
 	if (!(adev->flags & AMD_IS_APU))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 24caff0..4d019d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
 	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
 					uint32_t reg0, uint32_t reg1,
 					uint32_t ref, uint32_t mask);
-	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
+	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+				bool secure);
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
 			      enum drm_sched_priority priority);
@@ -247,7 +248,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
-#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7b61583..748ac35 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
 {
@@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 					   sizeof(de_payload) >> 2);
 }
 
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				    bool secure)
 {
-	if (amdgpu_is_tmz(ring->adev)) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-	}
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
@@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
 	.preempt_ib = gfx_v10_0_ring_preempt_ib,
-	.emit_tmz = gfx_v10_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1c7a16b..fbde712 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
 }
 
-static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				   bool secure)
 {
-	if (amdgpu_is_tmz(ring->adev)) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-	}
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
@@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
-	.emit_tmz = gfx_v9_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag
  2020-02-25 13:57 [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag Huang Rui
@ 2020-02-25 14:10 ` Pierre-Eric Pelloux-Prayer
  2020-02-25 14:10 ` Alex Deucher
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 12+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2020-02-25 14:10 UTC (permalink / raw)
  To: Huang Rui, amd-gfx
  Cc: Alex Deucher, Luben Tuikov, Christian König, Aaron Liu

Hi Ray,

I confirm that this fixes the hang I had when running libdrm's amdgpu_test.
Thanks!

Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>


On 25/02/2020 14:57, Huang Rui wrote:
> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
> This causes hangs on some hardware (eg: Raven1). This patch restores the
> unconditionnal frame control packets issuing, that's to keep the per-IB logic
> regarding the secure flag.
> 
> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
> 
> Reported-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 41 +++++++++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++-----
>  4 files changed, 43 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4b2342d..9713a7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  	uint64_t fence_ctx;
>  	uint32_t status = 0, alloc_size;
>  	unsigned fence_flags = 0;
> -	bool secure;
> +	int secure = -1;
>  
>  	unsigned i;
>  	int r = 0;
> @@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		amdgpu_ring_emit_cntxcntl(ring, status);
>  	}
>  
> -	secure = false;
>  	for (i = 0; i < num_ibs; ++i) {
>  		ib = &ibs[i];
>  
> @@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>  			continue;
>  
> -		/* If this IB is TMZ, add frame TMZ start packet,
> -		 * else, turn off TMZ.
> -		 */
> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> -			if (!secure) {
> -				secure = true;
> -				amdgpu_ring_emit_tmz(ring, true);
> +		if (job && ring->funcs->emit_frame_cntl) {
> +			if (secure == -1) {
> +				if (ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> +					secure = 1;
> +					amdgpu_ring_emit_frame_cntl(ring, true, true);
> +				} else {
> +					secure = 0;
> +					amdgpu_ring_emit_frame_cntl(ring, true, false);
> +				}
> +			} else {
> +				if (secure == 1 &&
> +				    !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> +					secure = 0;
> +					amdgpu_ring_emit_frame_cntl(ring, false, true);
> +					amdgpu_ring_emit_frame_cntl(ring, true, false);
> +				} else if (secure == 0 &&
> +					   ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> +					secure = 1;
> +					amdgpu_ring_emit_frame_cntl(ring, false, false);
> +					amdgpu_ring_emit_frame_cntl(ring, true, true);
> +				}
>  			}
> -		} else if (secure) {
> -			secure = false;
> -			amdgpu_ring_emit_tmz(ring, false);
>  		}
>  
>  		amdgpu_ring_emit_ib(ring, job, ib, status);
>  		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>  	}
>  
> -	if (secure) {
> -		secure = false;
> -		amdgpu_ring_emit_tmz(ring, false);
> -	}
> +	if (job && ring->funcs->emit_frame_cntl)
> +		amdgpu_ring_emit_frame_cntl(ring, false,
> +					    (secure == 1) ? true : false);
>  
>  #ifdef CONFIG_X86_64
>  	if (!(adev->flags & AMD_IS_APU))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 24caff0..4d019d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>  	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>  					uint32_t reg0, uint32_t reg1,
>  					uint32_t ref, uint32_t mask);
> -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
> +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
> +				bool secure);
>  	/* priority functions */
>  	void (*set_priority) (struct amdgpu_ring *ring,
>  			      enum drm_sched_priority priority);
> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>  #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>  #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>  #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 7b61583..748ac35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>  static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>  static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>  static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>  
>  static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>  {
> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>  					   sizeof(de_payload) >> 2);
>  }
>  
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +				    bool secure)
>  {
> -	if (amdgpu_is_tmz(ring->adev)) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -	}
> +	uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>  }
>  
>  static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>  	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>  	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>  	.preempt_ib = gfx_v10_0_ring_preempt_ib,
> -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
> +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>  	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>  	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>  	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 1c7a16b..fbde712 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>  	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>  }
>  
> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +				   bool secure)
>  {
> -	if (amdgpu_is_tmz(ring->adev)) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -	}
> +	uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>  }
>  
>  static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>  	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>  	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>  	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>  	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>  	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>  	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag
  2020-02-25 13:57 [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag Huang Rui
  2020-02-25 14:10 ` Pierre-Eric Pelloux-Prayer
@ 2020-02-25 14:10 ` Alex Deucher
  2020-02-25 14:30 ` Liu, Aaron
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 12+ messages in thread
From: Alex Deucher @ 2020-02-25 14:10 UTC (permalink / raw)
  To: Huang Rui
  Cc: Pierre-Eric Pelloux-Prayer, Aaron Liu, amd-gfx list,
	Luben Tuikov, Alex Deucher, Christian König

On Tue, Feb 25, 2020 at 8:58 AM Huang Rui <ray.huang@amd.com> wrote:
>
> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
> This causes hangs on some hardware (eg: Raven1). This patch restores the
> unconditionnal frame control packets issuing, that's to keep the per-IB logic
> regarding the secure flag.
>
> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
>
> Reported-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> Signed-off-by: Huang Rui <ray.huang@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 41 +++++++++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++-----
>  4 files changed, 43 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4b2342d..9713a7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>         uint64_t fence_ctx;
>         uint32_t status = 0, alloc_size;
>         unsigned fence_flags = 0;
> -       bool secure;
> +       int secure = -1;
>
>         unsigned i;
>         int r = 0;
> @@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>                 amdgpu_ring_emit_cntxcntl(ring, status);
>         }
>
> -       secure = false;
>         for (i = 0; i < num_ibs; ++i) {
>                 ib = &ibs[i];
>
> @@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>                     !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>                         continue;
>
> -               /* If this IB is TMZ, add frame TMZ start packet,
> -                * else, turn off TMZ.
> -                */
> -               if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> -                       if (!secure) {
> -                               secure = true;
> -                               amdgpu_ring_emit_tmz(ring, true);
> +               if (job && ring->funcs->emit_frame_cntl) {
> +                       if (secure == -1) {
> +                               if (ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> +                                       secure = 1;
> +                                       amdgpu_ring_emit_frame_cntl(ring, true, true);
> +                               } else {
> +                                       secure = 0;
> +                                       amdgpu_ring_emit_frame_cntl(ring, true, false);
> +                               }
> +                       } else {
> +                               if (secure == 1 &&
> +                                   !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> +                                       secure = 0;
> +                                       amdgpu_ring_emit_frame_cntl(ring, false, true);
> +                                       amdgpu_ring_emit_frame_cntl(ring, true, false);
> +                               } else if (secure == 0 &&
> +                                          ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> +                                       secure = 1;
> +                                       amdgpu_ring_emit_frame_cntl(ring, false, false);
> +                                       amdgpu_ring_emit_frame_cntl(ring, true, true);
> +                               }
>                         }
> -               } else if (secure) {
> -                       secure = false;
> -                       amdgpu_ring_emit_tmz(ring, false);
>                 }
>
>                 amdgpu_ring_emit_ib(ring, job, ib, status);
>                 status &= ~AMDGPU_HAVE_CTX_SWITCH;
>         }
>
> -       if (secure) {
> -               secure = false;
> -               amdgpu_ring_emit_tmz(ring, false);
> -       }
> +       if (job && ring->funcs->emit_frame_cntl)
> +               amdgpu_ring_emit_frame_cntl(ring, false,
> +                                           (secure == 1) ? true : false);
>
>  #ifdef CONFIG_X86_64
>         if (!(adev->flags & AMD_IS_APU))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 24caff0..4d019d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>         void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>                                         uint32_t reg0, uint32_t reg1,
>                                         uint32_t ref, uint32_t mask);
> -       void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
> +       void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
> +                               bool secure);
>         /* priority functions */
>         void (*set_priority) (struct amdgpu_ring *ring,
>                               enum drm_sched_priority priority);
> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>  #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>  #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>  #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 7b61583..748ac35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>  static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>  static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>  static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>
>  static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>  {
> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>                                            sizeof(de_payload) >> 2);
>  }
>
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +                                   bool secure)
>  {
> -       if (amdgpu_is_tmz(ring->adev)) {
> -               amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -               amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -       }
> +       uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +       amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>  }
>
>  static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>         .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>         .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>         .preempt_ib = gfx_v10_0_ring_preempt_ib,
> -       .emit_tmz = gfx_v10_0_ring_emit_tmz,
> +       .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>         .emit_wreg = gfx_v10_0_ring_emit_wreg,
>         .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>         .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 1c7a16b..fbde712 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>  }
>
> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +                                  bool secure)
>  {
> -       if (amdgpu_is_tmz(ring->adev)) {
> -               amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -               amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -       }
> +       uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +       amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +       amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>  }
>
>  static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> -       .emit_tmz = gfx_v9_0_ring_emit_tmz,
> +       .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>         .emit_wreg = gfx_v9_0_ring_emit_wreg,
>         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag
  2020-02-25 13:57 [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag Huang Rui
  2020-02-25 14:10 ` Pierre-Eric Pelloux-Prayer
  2020-02-25 14:10 ` Alex Deucher
@ 2020-02-25 14:30 ` Liu, Aaron
  2020-02-25 22:56 ` Luben Tuikov
  2020-02-25 23:40 ` Luben Tuikov
  4 siblings, 0 replies; 12+ messages in thread
From: Liu, Aaron @ 2020-02-25 14:30 UTC (permalink / raw)
  To: Huang, Ray, amd-gfx
  Cc: Deucher, Alexander, Pelloux-prayer, Pierre-eric, Tuikov, Luben,
	Koenig, Christian

[AMD Official Use Only - Internal Distribution Only]

Reviewed&Tested-by: Aaron Liu <aaron.liu@amd.com>

-----Original Message-----
From: Huang, Ray <Ray.Huang@amd.com> 
Sent: Tuesday, February 25, 2020 9:58 PM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Liu, Aaron <Aaron.Liu@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; Pelloux-prayer, Pierre-eric <Pierre-eric.Pelloux-prayer@amd.com>; Huang, Ray <Ray.Huang@amd.com>
Subject: [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag

Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
This causes hangs on some hardware (eg: Raven1). This patch restores the unconditionnal frame control packets issuing, that's to keep the per-IB logic regarding the secure flag.

Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)

Reported-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 41 +++++++++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++-----
 4 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4b2342d..9713a7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	uint64_t fence_ctx;
 	uint32_t status = 0, alloc_size;
 	unsigned fence_flags = 0;
-	bool secure;
+	int secure = -1;
 
 	unsigned i;
 	int r = 0;
@@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}
 
-	secure = false;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
 
@@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
 			continue;
 
-		/* If this IB is TMZ, add frame TMZ start packet,
-		 * else, turn off TMZ.
-		 */
-		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
-			if (!secure) {
-				secure = true;
-				amdgpu_ring_emit_tmz(ring, true);
+		if (job && ring->funcs->emit_frame_cntl) {
+			if (secure == -1) {
+				if (ib->flags & AMDGPU_IB_FLAGS_SECURE) {
+					secure = 1;
+					amdgpu_ring_emit_frame_cntl(ring, true, true);
+				} else {
+					secure = 0;
+					amdgpu_ring_emit_frame_cntl(ring, true, false);
+				}
+			} else {
+				if (secure == 1 &&
+				    !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+					secure = 0;
+					amdgpu_ring_emit_frame_cntl(ring, false, true);
+					amdgpu_ring_emit_frame_cntl(ring, true, false);
+				} else if (secure == 0 &&
+					   ib->flags & AMDGPU_IB_FLAGS_SECURE) {
+					secure = 1;
+					amdgpu_ring_emit_frame_cntl(ring, false, false);
+					amdgpu_ring_emit_frame_cntl(ring, true, true);
+				}
 			}
-		} else if (secure) {
-			secure = false;
-			amdgpu_ring_emit_tmz(ring, false);
 		}
 
 		amdgpu_ring_emit_ib(ring, job, ib, status);
 		status &= ~AMDGPU_HAVE_CTX_SWITCH;
 	}
 
-	if (secure) {
-		secure = false;
-		amdgpu_ring_emit_tmz(ring, false);
-	}
+	if (job && ring->funcs->emit_frame_cntl)
+		amdgpu_ring_emit_frame_cntl(ring, false,
+					    (secure == 1) ? true : false);
 
 #ifdef CONFIG_X86_64
 	if (!(adev->flags & AMD_IS_APU))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 24caff0..4d019d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
 	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
 					uint32_t reg0, uint32_t reg1,
 					uint32_t ref, uint32_t mask);
-	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
+	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+				bool secure);
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
 			      enum drm_sched_priority priority); @@ -247,7 +248,7 @@ struct amdgpu_ring {  #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))  #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))  #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) 
+(r)->funcs->emit_frame_cntl((r), (b), (s))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7b61583..748ac35 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);  static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);  static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);  static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, 
+bool start, bool secure);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)  { @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 					   sizeof(de_payload) >> 2);
 }
 
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				    bool secure)
 {
-	if (amdgpu_is_tmz(ring->adev)) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-	}
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
 	.preempt_ib = gfx_v10_0_ring_preempt_ib,
-	.emit_tmz = gfx_v10_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1c7a16b..fbde712 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);  }
 
-static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				   bool secure)
 {
-	if (amdgpu_is_tmz(ring->adev)) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-	}
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
-	.emit_tmz = gfx_v9_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag
  2020-02-25 13:57 [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag Huang Rui
                   ` (2 preceding siblings ...)
  2020-02-25 14:30 ` Liu, Aaron
@ 2020-02-25 22:56 ` Luben Tuikov
  2020-02-25 23:40 ` Luben Tuikov
  4 siblings, 0 replies; 12+ messages in thread
From: Luben Tuikov @ 2020-02-25 22:56 UTC (permalink / raw)
  To: Huang Rui, amd-gfx
  Cc: Alex Deucher, Pierre-Eric Pelloux-Prayer, Christian König,
	Aaron Liu

Thanks for fixing this Ray.

I think we can simplify the logic introduced in this patch.

Regards,
Luben

On 2020-02-25 8:57 a.m., Huang Rui wrote:
> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
> This causes hangs on some hardware (eg: Raven1). This patch restores the
> unconditionnal frame control packets issuing, that's to keep the per-IB logic
> regarding the secure flag.
> 
> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
> 
> Reported-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 41 +++++++++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++-----
>  4 files changed, 43 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4b2342d..9713a7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  	uint64_t fence_ctx;
>  	uint32_t status = 0, alloc_size;
>  	unsigned fence_flags = 0;
> -	bool secure;
> +	int secure = -1;
>  
>  	unsigned i;
>  	int r = 0;
> @@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		amdgpu_ring_emit_cntxcntl(ring, status);
>  	}
>  
> -	secure = false;
>  	for (i = 0; i < num_ibs; ++i) {
>  		ib = &ibs[i];
>  
> @@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>  			continue;
>  
> -		/* If this IB is TMZ, add frame TMZ start packet,
> -		 * else, turn off TMZ.
> -		 */
> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> -			if (!secure) {
> -				secure = true;
> -				amdgpu_ring_emit_tmz(ring, true);
> +		if (job && ring->funcs->emit_frame_cntl) {
> +			if (secure == -1) {
> +				if (ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> +					secure = 1;
> +					amdgpu_ring_emit_frame_cntl(ring, true, true);
> +				} else {
> +					secure = 0;
> +					amdgpu_ring_emit_frame_cntl(ring, true, false);
> +				}
> +			} else {
> +				if (secure == 1 &&
> +				    !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> +					secure = 0;
> +					amdgpu_ring_emit_frame_cntl(ring, false, true);
> +					amdgpu_ring_emit_frame_cntl(ring, true, false);
> +				} else if (secure == 0 &&
> +					   ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> +					secure = 1;
> +					amdgpu_ring_emit_frame_cntl(ring, false, false);
> +					amdgpu_ring_emit_frame_cntl(ring, true, true);
> +				}
>  			}
> -		} else if (secure) {
> -			secure = false;
> -			amdgpu_ring_emit_tmz(ring, false);
>  		}
>  
>  		amdgpu_ring_emit_ib(ring, job, ib, status);
>  		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>  	}
>  
> -	if (secure) {
> -		secure = false;
> -		amdgpu_ring_emit_tmz(ring, false);
> -	}
> +	if (job && ring->funcs->emit_frame_cntl)
> +		amdgpu_ring_emit_frame_cntl(ring, false,
> +					    (secure == 1) ? true : false);
>  
>  #ifdef CONFIG_X86_64
>  	if (!(adev->flags & AMD_IS_APU))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 24caff0..4d019d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>  	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>  					uint32_t reg0, uint32_t reg1,
>  					uint32_t ref, uint32_t mask);
> -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
> +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
> +				bool secure);
>  	/* priority functions */
>  	void (*set_priority) (struct amdgpu_ring *ring,
>  			      enum drm_sched_priority priority);
> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>  #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>  #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>  #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 7b61583..748ac35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>  static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>  static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>  static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>  
>  static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>  {
> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>  					   sizeof(de_payload) >> 2);
>  }
>  
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +				    bool secure)
>  {
> -	if (amdgpu_is_tmz(ring->adev)) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -	}
> +	uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>  }
>  
>  static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>  	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>  	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>  	.preempt_ib = gfx_v10_0_ring_preempt_ib,
> -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
> +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>  	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>  	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>  	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 1c7a16b..fbde712 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>  	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>  }
>  
> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +				   bool secure)
>  {
> -	if (amdgpu_is_tmz(ring->adev)) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -	}
> +	uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>  }
>  
>  static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>  	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>  	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>  	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>  	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>  	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>  	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag
  2020-02-25 13:57 [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag Huang Rui
                   ` (3 preceding siblings ...)
  2020-02-25 22:56 ` Luben Tuikov
@ 2020-02-25 23:40 ` Luben Tuikov
  2020-02-26 16:26   ` Luben Tuikov
  4 siblings, 1 reply; 12+ messages in thread
From: Luben Tuikov @ 2020-02-25 23:40 UTC (permalink / raw)
  To: Huang Rui, amd-gfx
  Cc: Alex Deucher, Pierre-Eric Pelloux-Prayer, Christian König,
	Aaron Liu

On 2020-02-25 8:57 a.m., Huang Rui wrote:
> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
> This causes hangs on some hardware (eg: Raven1). This patch restores the
> unconditionnal frame control packets issuing, that's to keep the per-IB logic
> regarding the secure flag.
> 
> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
> 
> Reported-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 41 +++++++++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++-----
>  4 files changed, 43 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4b2342d..9713a7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  	uint64_t fence_ctx;
>  	uint32_t status = 0, alloc_size;
>  	unsigned fence_flags = 0;
> -	bool secure;
> +	int secure = -1;

Don't initialize them far away from where they're being
used. That's a very bad habit.
This function is pretty long.
Initialize "secure" next to where it is being used--right
above the for-loop so that it is clear how it's used.

>  
>  	unsigned i;
>  	int r = 0;
> @@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		amdgpu_ring_emit_cntxcntl(ring, status);
>  	}
>  
> -	secure = false;

Set "secure" here, close to where it is used.
This makes the code modular, so that it is easy
to see what is happening _locally_. Another
advantage is that should ever this code be pulled
out into its own function, we don't have to chase
variables around.

>  	for (i = 0; i < num_ibs; ++i) {
>  		ib = &ibs[i];
>  
> @@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>  			continue;
>  
> -		/* If this IB is TMZ, add frame TMZ start packet,
> -		 * else, turn off TMZ.
> -		 */
> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> -			if (!secure) {
> -				secure = true;
> -				amdgpu_ring_emit_tmz(ring, true);
> +		if (job && ring->funcs->emit_frame_cntl) {
> +			if (secure == -1) {
> +				if (ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> +					secure = 1;
> +					amdgpu_ring_emit_frame_cntl(ring, true, true);
> +				} else {
> +					secure = 0;
> +					amdgpu_ring_emit_frame_cntl(ring, true, false);
> +				}

So the above is executed only once. At the very first
iteration, since "secure" is set to -1. You can pull
that out of the loop and have a more straightforward
body of the loop.

Regards,
Luben

> +			} else {
> +				if (secure == 1 &&
> +				    !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> +					secure = 0;
> +					amdgpu_ring_emit_frame_cntl(ring, false, true);
> +					amdgpu_ring_emit_frame_cntl(ring, true, false);
> +				} else if (secure == 0 &&
> +					   ib->flags & AMDGPU_IB_FLAGS_SECURE) {
> +					secure = 1;
> +					amdgpu_ring_emit_frame_cntl(ring, false, false);
> +					amdgpu_ring_emit_frame_cntl(ring, true, true);
> +				}
>  			}
> -		} else if (secure) {
> -			secure = false;
> -			amdgpu_ring_emit_tmz(ring, false);
>  		}
>  
>  		amdgpu_ring_emit_ib(ring, job, ib, status);
>  		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>  	}
>  
> -	if (secure) {
> -		secure = false;
> -		amdgpu_ring_emit_tmz(ring, false);
> -	}
> +	if (job && ring->funcs->emit_frame_cntl)
> +		amdgpu_ring_emit_frame_cntl(ring, false,
> +					    (secure == 1) ? true : false);
>  
>  #ifdef CONFIG_X86_64
>  	if (!(adev->flags & AMD_IS_APU))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 24caff0..4d019d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>  	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>  					uint32_t reg0, uint32_t reg1,
>  					uint32_t ref, uint32_t mask);
> -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
> +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
> +				bool secure);
>  	/* priority functions */
>  	void (*set_priority) (struct amdgpu_ring *ring,
>  			      enum drm_sched_priority priority);
> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>  #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>  #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>  #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 7b61583..748ac35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>  static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>  static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>  static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>  
>  static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>  {
> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>  					   sizeof(de_payload) >> 2);
>  }
>  
> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +				    bool secure)
>  {
> -	if (amdgpu_is_tmz(ring->adev)) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -	}
> +	uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>  }
>  
>  static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>  	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>  	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>  	.preempt_ib = gfx_v10_0_ring_preempt_ib,
> -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
> +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>  	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>  	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>  	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 1c7a16b..fbde712 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>  	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>  }
>  
> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
> +				   bool secure)
>  {
> -	if (amdgpu_is_tmz(ring->adev)) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
> -	}
> +	uint32_t v = secure ? FRAME_TMZ : 0;
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>  }
>  
>  static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>  	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>  	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>  	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>  	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>  	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>  	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag
  2020-02-25 23:40 ` Luben Tuikov
@ 2020-02-26 16:26   ` Luben Tuikov
  2020-02-26 22:39     ` [PATCH 0/1] Fixing the GFX hang Luben Tuikov
  0 siblings, 1 reply; 12+ messages in thread
From: Luben Tuikov @ 2020-02-26 16:26 UTC (permalink / raw)
  To: Huang Rui, amd-gfx
  Cc: Alex Deucher, Pierre-Eric Pelloux-Prayer, Christian König,
	Aaron Liu

On 2020-02-25 18:40, Luben Tuikov wrote:
> On 2020-02-25 8:57 a.m., Huang Rui wrote:
>> Since 6643ba1 frame control packets are only issued in presence of secure IB(s).
>> This causes hangs on some hardware (eg: Raven1). This patch restores the
>> unconditionnal frame control packets issuing, that's to keep the per-IB logic
>> regarding the secure flag.
>>
>> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ)
>>
>> Reported-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>> Signed-off-by: Huang Rui <ray.huang@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 41 +++++++++++++++++++-------------
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 ++--
>>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++++++------
>>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 +++++-----
>>  4 files changed, 43 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 4b2342d..9713a7d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>  	uint64_t fence_ctx;
>>  	uint32_t status = 0, alloc_size;
>>  	unsigned fence_flags = 0;
>> -	bool secure;
>> +	int secure = -1;
> 
> Don't initialize them far away from where they're being
> used. That's a very bad habit.
> This function is pretty long.
> Initialize "secure" next to where it is being used--right
> above the for-loop so that it is clear how it's used.
> 
>>  
>>  	unsigned i;
>>  	int r = 0;
>> @@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>  		amdgpu_ring_emit_cntxcntl(ring, status);
>>  	}
>>  
>> -	secure = false;
> 
> Set "secure" here, close to where it is used.
> This makes the code modular, so that it is easy
> to see what is happening _locally_. Another
> advantage is that should ever this code be pulled
> out into its own function, we don't have to chase
> variables around.
> 
>>  	for (i = 0; i < num_ibs; ++i) {
>>  		ib = &ibs[i];
>>  
>> @@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>  		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>>  			continue;
>>  
>> -		/* If this IB is TMZ, add frame TMZ start packet,
>> -		 * else, turn off TMZ.
>> -		 */
>> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
>> -			if (!secure) {
>> -				secure = true;
>> -				amdgpu_ring_emit_tmz(ring, true);
>> +		if (job && ring->funcs->emit_frame_cntl) {

Also note that the value of this conditional is static to the body of the loop,
i.e. invariant to the loop, and it can be pulled out of the loop.
Each loop iteration doesn't change the value of this conditional. This would 
create two tight loops. This is a formulaic optimization method which generates
slightly longer code, but more streamlined.

I'm ambivalent as to whether you implement those optimizations.

Regards,
Luben

>> +			if (secure == -1) {
>> +				if (ib->flags & AMDGPU_IB_FLAGS_SECURE) {
>> +					secure = 1;
>> +					amdgpu_ring_emit_frame_cntl(ring, true, true);
>> +				} else {
>> +					secure = 0;
>> +					amdgpu_ring_emit_frame_cntl(ring, true, false);
>> +				}
> 
> So the above is executed only once. At the very first
> iteration, since "secure" is set to -1. You can pull
> that out of the loop and have a more straightforward
> body of the loop.
> 
> Regards,
> Luben
> 
>> +			} else {
>> +				if (secure == 1 &&
>> +				    !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
>> +					secure = 0;
>> +					amdgpu_ring_emit_frame_cntl(ring, false, true);
>> +					amdgpu_ring_emit_frame_cntl(ring, true, false);
>> +				} else if (secure == 0 &&
>> +					   ib->flags & AMDGPU_IB_FLAGS_SECURE) {
>> +					secure = 1;
>> +					amdgpu_ring_emit_frame_cntl(ring, false, false);
>> +					amdgpu_ring_emit_frame_cntl(ring, true, true);
>> +				}
>>  			}
>> -		} else if (secure) {
>> -			secure = false;
>> -			amdgpu_ring_emit_tmz(ring, false);
>>  		}
>>  
>>  		amdgpu_ring_emit_ib(ring, job, ib, status);
>>  		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>>  	}
>>  
>> -	if (secure) {
>> -		secure = false;
>> -		amdgpu_ring_emit_tmz(ring, false);
>> -	}
>> +	if (job && ring->funcs->emit_frame_cntl)
>> +		amdgpu_ring_emit_frame_cntl(ring, false,
>> +					    (secure == 1) ? true : false);
>>  
>>  #ifdef CONFIG_X86_64
>>  	if (!(adev->flags & AMD_IS_APU))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index 24caff0..4d019d6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
>>  	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
>>  					uint32_t reg0, uint32_t reg1,
>>  					uint32_t ref, uint32_t mask);
>> -	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
>> +	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
>> +				bool secure);
>>  	/* priority functions */
>>  	void (*set_priority) (struct amdgpu_ring *ring,
>>  			      enum drm_sched_priority priority);
>> @@ -247,7 +248,7 @@ struct amdgpu_ring {
>>  #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>>  #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>>  #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
>> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
>>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>>  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>>  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index 7b61583..748ac35 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
>>  static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
>>  static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
>>  static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
>>  
>>  static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
>>  {
>> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
>>  					   sizeof(de_payload) >> 2);
>>  }
>>  
>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>> +				    bool secure)
>>  {
>> -	if (amdgpu_is_tmz(ring->adev)) {
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
>> -	}
>> +	uint32_t v = secure ? FRAME_TMZ : 0;
>> +
>> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>>  }
>>  
>>  static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
>> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>>  	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
>>  	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
>>  	.preempt_ib = gfx_v10_0_ring_preempt_ib,
>> -	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>> +	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
>>  	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>>  	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>>  	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 1c7a16b..fbde712 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
>>  	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
>>  }
>>  
>> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
>> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
>> +				   bool secure)
>>  {
>> -	if (amdgpu_is_tmz(ring->adev)) {
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>> -		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
>> -	}
>> +	uint32_t v = secure ? FRAME_TMZ : 0;
>> +
>> +	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
>> +	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
>>  }
>>  
>>  static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
>> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>>  	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
>>  	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
>>  	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
>> -	.emit_tmz = gfx_v9_0_ring_emit_tmz,
>> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
>>  	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>>  	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>>  	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
>>
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 0/1] Fixing the GFX hang
  2020-02-26 16:26   ` Luben Tuikov
@ 2020-02-26 22:39     ` Luben Tuikov
  2020-02-26 22:39       ` [PATCH 1/1] drm/amdgpu: Fix per-IB secure flag " Luben Tuikov
  0 siblings, 1 reply; 12+ messages in thread
From: Luben Tuikov @ 2020-02-26 22:39 UTC (permalink / raw)
  To: amd-gfx
  Cc: Pierre-Eric Pelloux-Prayer, Aaron Liu, Huang Rui,
	Alexander Deucher, Christian König, Luben Tuikov

I was thinking like something like this, Ray.
It optimizes the body of the loop, and pulls
out invariant conditionals out of the loop,
and a few other optimizations.

Luben Tuikov (1):
  drm/amdgpu: Fix per-IB secure flag GFX hang

 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 87 +++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 ++--
 4 files changed, 79 insertions(+), 41 deletions(-)

-- 
2.25.1.362.g51ebf55b93

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/1] drm/amdgpu: Fix per-IB secure flag GFX hang
  2020-02-26 22:39     ` [PATCH 0/1] Fixing the GFX hang Luben Tuikov
@ 2020-02-26 22:39       ` Luben Tuikov
  2020-02-27 11:56         ` Huang Rui
  0 siblings, 1 reply; 12+ messages in thread
From: Luben Tuikov @ 2020-02-26 22:39 UTC (permalink / raw)
  To: amd-gfx
  Cc: Pierre-Eric Pelloux-Prayer, Aaron Liu, Huang Rui,
	Alexander Deucher, Christian König, Luben Tuikov

Since commit "Move to a per-IB secure flag (TMZ)",
we've been seeing hangs in GFX. Ray H. pointed out
by sending a patch that we need to send FRAME
CONTROL stop/start back-to-back, every time we
flip the TMZ flag as per each IB we submit. That
is, when we transition from TMZ to non-TMZ we have
to send a stop with TMZ followed by a start with
non-TMZ, and similarly for transitioning from
non-TMZ into TMZ.

This patch implements this, thus fixing the GFX
hang.

Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 87 +++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 ++--
 4 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4b2342d11520..16d6df3304d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -216,40 +216,75 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}
 
-	secure = false;
+	/* Find the first non-preamble IB.
+	 */
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
 
 		/* drop preamble IBs if we don't have a context switch */
-		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
-		    skip_preamble &&
-		    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
-		    !amdgpu_mcbp &&
-		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
-			continue;
-
-		/* If this IB is TMZ, add frame TMZ start packet,
-		 * else, turn off TMZ.
-		 */
-		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
-			if (!secure) {
-				secure = true;
-				amdgpu_ring_emit_tmz(ring, true);
-			}
-		} else if (secure) {
+		if (!(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) ||
+		    !skip_preamble ||
+		    (status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) ||
+		    amdgpu_mcbp ||
+		    amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
+			break;
+	}
+	if (i >= num_ibs)
+		goto Done;
+	/* Setup initial TMZiness and send it off.
+	 */
+	secure = false;
+	if (job && ring->funcs->emit_frame_cntl) {
+		if (ib->flags & AMDGPU_IB_FLAGS_SECURE)
+			secure = true;
+		else
 			secure = false;
-			amdgpu_ring_emit_tmz(ring, false);
-		}
-
-		amdgpu_ring_emit_ib(ring, job, ib, status);
-		status &= ~AMDGPU_HAVE_CTX_SWITCH;
+		amdgpu_ring_emit_frame_cntl(ring, true, secure);
 	}
+	amdgpu_ring_emit_ib(ring, job, ib, status);
+	status &= ~AMDGPU_HAVE_CTX_SWITCH;
+	i += 1;
+	/* Send the rest of the IBs.
+	 */
+	if (job && ring->funcs->emit_frame_cntl) {
+		for ( ; i < num_ibs; ++i) {
+			ib = &ibs[i];
+
+			/* drop preamble IBs if we don't have a context switch */
+			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+			    skip_preamble &&
+			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
+			    !amdgpu_mcbp &&
+			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
+				continue;
+
+			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+				amdgpu_ring_emit_frame_cntl(ring, false, secure);
+				secure = !secure;
+				amdgpu_ring_emit_frame_cntl(ring, true, secure);
+			}
 
-	if (secure) {
-		secure = false;
-		amdgpu_ring_emit_tmz(ring, false);
+			amdgpu_ring_emit_ib(ring, job, ib, status);
+			status &= ~AMDGPU_HAVE_CTX_SWITCH;
+		}
+		amdgpu_ring_emit_frame_cntl(ring, false, secure);
+	} else {
+		for ( ; i < num_ibs; ++i) {
+			ib = &ibs[i];
+
+			/* drop preamble IBs if we don't have a context switch */
+			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+			    skip_preamble &&
+			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
+			    !amdgpu_mcbp &&
+			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
+				continue;
+
+			amdgpu_ring_emit_ib(ring, job, ib, status);
+			status &= ~AMDGPU_HAVE_CTX_SWITCH;
+		}
 	}
-
+Done:
 #ifdef CONFIG_X86_64
 	if (!(adev->flags & AMD_IS_APU))
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 24caff085d00..4d019d6b3eb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -166,7 +166,8 @@ struct amdgpu_ring_funcs {
 	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
 					uint32_t reg0, uint32_t reg1,
 					uint32_t ref, uint32_t mask);
-	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
+	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+				bool secure);
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
 			      enum drm_sched_priority priority);
@@ -247,7 +248,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
-#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 44f00ecea322..3e83ddb64c3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
 
 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
 {
@@ -4724,12 +4724,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 					   sizeof(de_payload) >> 2);
 }
 
-static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				    bool secure)
 {
-	if (amdgpu_is_tmz(ring->adev)) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-	}
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
@@ -5183,7 +5184,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
 	.preempt_ib = gfx_v10_0_ring_preempt_ib,
-	.emit_tmz = gfx_v10_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1c7a16b91686..fbde71224127 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
 }
 
-static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				   bool secure)
 {
-	if (amdgpu_is_tmz(ring->adev)) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
-	}
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
@@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
-	.emit_tmz = gfx_v9_0_ring_emit_tmz,
+	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
-- 
2.25.1.362.g51ebf55b93

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Fix per-IB secure flag GFX hang
  2020-02-26 22:39       ` [PATCH 1/1] drm/amdgpu: Fix per-IB secure flag " Luben Tuikov
@ 2020-02-27 11:56         ` Huang Rui
  2020-02-27 21:15           ` Luben Tuikov
  0 siblings, 1 reply; 12+ messages in thread
From: Huang Rui @ 2020-02-27 11:56 UTC (permalink / raw)
  To: Tuikov, Luben
  Cc: Deucher, Alexander, Pelloux-prayer, Pierre-eric, Koenig,
	Christian, amd-gfx, Liu, Aaron

On Thu, Feb 27, 2020 at 06:39:03AM +0800, Tuikov, Luben wrote:
> Since commit "Move to a per-IB secure flag (TMZ)",
> we've been seeing hangs in GFX. Ray H. pointed out
> by sending a patch that we need to send FRAME
> CONTROL stop/start back-to-back, every time we
> flip the TMZ flag as per each IB we submit. That
> is, when we transition from TMZ to non-TMZ we have
> to send a stop with TMZ followed by a start with
> non-TMZ, and similarly for transitioning from
> non-TMZ into TMZ.
> 
> This patch implements this, thus fixing the GFX
> hang.
> 
> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 87 +++++++++++++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++--
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 ++--
>  4 files changed, 79 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4b2342d11520..16d6df3304d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -216,40 +216,75 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>  		amdgpu_ring_emit_cntxcntl(ring, status);
>  	}
>  
> -	secure = false;
> +	/* Find the first non-preamble IB.
> +	 */
>  	for (i = 0; i < num_ibs; ++i) {
>  		ib = &ibs[i];
>  
>  		/* drop preamble IBs if we don't have a context switch */
> -		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> -		    skip_preamble &&
> -		    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> -		    !amdgpu_mcbp &&
> -		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> -			continue;
> -
> -		/* If this IB is TMZ, add frame TMZ start packet,
> -		 * else, turn off TMZ.
> -		 */
> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> -			if (!secure) {
> -				secure = true;
> -				amdgpu_ring_emit_tmz(ring, true);
> -			}
> -		} else if (secure) {
> +		if (!(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) ||
> +		    !skip_preamble ||
> +		    (status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) ||
> +		    amdgpu_mcbp ||
> +		    amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> +			break;
> +	}
> +	if (i >= num_ibs)
> +		goto Done;
> +	/* Setup initial TMZiness and send it off.
> +	 */
> +	secure = false;
> +	if (job && ring->funcs->emit_frame_cntl) {
> +		if (ib->flags & AMDGPU_IB_FLAGS_SECURE)
> +			secure = true;
> +		else
>  			secure = false;
> -			amdgpu_ring_emit_tmz(ring, false);
> -		}
> -
> -		amdgpu_ring_emit_ib(ring, job, ib, status);
> -		status &= ~AMDGPU_HAVE_CTX_SWITCH;
> +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
>  	}
> +	amdgpu_ring_emit_ib(ring, job, ib, status);
> +	status &= ~AMDGPU_HAVE_CTX_SWITCH;
> +	i += 1;
> +	/* Send the rest of the IBs.
> +	 */
> +	if (job && ring->funcs->emit_frame_cntl) {
> +		for ( ; i < num_ibs; ++i) {
> +			ib = &ibs[i];
> +
> +			/* drop preamble IBs if we don't have a context switch */
> +			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> +			    skip_preamble &&
> +			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> +			    !amdgpu_mcbp &&
> +			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> +				continue;

Snip.

> +
> +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
> +				secure = !secure;
> +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
> +			}

That's pretty good optimization! I spend quit a few time to understand this.

>  
> -	if (secure) {
> -		secure = false;
> -		amdgpu_ring_emit_tmz(ring, false);
> +			amdgpu_ring_emit_ib(ring, job, ib, status);
> +			status &= ~AMDGPU_HAVE_CTX_SWITCH;
> +		}
> +		amdgpu_ring_emit_frame_cntl(ring, false, secure);
> +	} else {
> +		for ( ; i < num_ibs; ++i) {
> +			ib = &ibs[i];
> +
> +			/* drop preamble IBs if we don't have a context switch */
> +			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> +			    skip_preamble &&
> +			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> +			    !amdgpu_mcbp &&
> +			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> +				continue;
> +
> +			amdgpu_ring_emit_ib(ring, job, ib, status);
> +			status &= ~AMDGPU_HAVE_CTX_SWITCH;

To pull the checking "job && ring->funcs->emit_frame_cntl" out of the loop,
that make the implemenation more duplicated like below, we have to write
the same codes multiple times. I hope the implementation is more simple and
readable. Please see my V2 patch that I just send out. We can try to use
minimum changes to fix the issue.

		for ( ; i < num_ibs; ++i) {
			ib = &ibs[i];

			/* drop preamble IBs if we don't have a context switch */
			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
			    skip_preamble &&
			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
			    !amdgpu_mcbp &&
			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
				continue;

                        ...
			amdgpu_ring_emit_ib(ring, job, ib, status);

Thanks,
Ray
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Fix per-IB secure flag GFX hang
  2020-02-27 11:56         ` Huang Rui
@ 2020-02-27 21:15           ` Luben Tuikov
  2020-02-27 21:30             ` Alex Deucher
  0 siblings, 1 reply; 12+ messages in thread
From: Luben Tuikov @ 2020-02-27 21:15 UTC (permalink / raw)
  To: Huang Rui
  Cc: Deucher, Alexander, Pelloux-prayer, Pierre-eric, Koenig,
	Christian, amd-gfx, Liu, Aaron

On 2020-02-27 6:56 a.m., Huang Rui wrote:
> On Thu, Feb 27, 2020 at 06:39:03AM +0800, Tuikov, Luben wrote:
>> Since commit "Move to a per-IB secure flag (TMZ)",
>> we've been seeing hangs in GFX. Ray H. pointed out
>> by sending a patch that we need to send FRAME
>> CONTROL stop/start back-to-back, every time we
>> flip the TMZ flag as per each IB we submit. That
>> is, when we transition from TMZ to non-TMZ we have
>> to send a stop with TMZ followed by a start with
>> non-TMZ, and similarly for transitioning from
>> non-TMZ into TMZ.
>>
>> This patch implements this, thus fixing the GFX
>> hang.
>>
>> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 87 +++++++++++++++++-------
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +-
>>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++--
>>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 ++--
>>  4 files changed, 79 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 4b2342d11520..16d6df3304d3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -216,40 +216,75 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>  		amdgpu_ring_emit_cntxcntl(ring, status);
>>  	}
>>  
>> -	secure = false;
>> +	/* Find the first non-preamble IB.
>> +	 */
>>  	for (i = 0; i < num_ibs; ++i) {
>>  		ib = &ibs[i];
>>  
>>  		/* drop preamble IBs if we don't have a context switch */
>> -		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
>> -		    skip_preamble &&
>> -		    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
>> -		    !amdgpu_mcbp &&
>> -		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>> -			continue;
>> -
>> -		/* If this IB is TMZ, add frame TMZ start packet,
>> -		 * else, turn off TMZ.
>> -		 */
>> -		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
>> -			if (!secure) {
>> -				secure = true;
>> -				amdgpu_ring_emit_tmz(ring, true);
>> -			}
>> -		} else if (secure) {
>> +		if (!(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) ||
>> +		    !skip_preamble ||
>> +		    (status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) ||
>> +		    amdgpu_mcbp ||
>> +		    amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>> +			break;
>> +	}
>> +	if (i >= num_ibs)
>> +		goto Done;
>> +	/* Setup initial TMZiness and send it off.
>> +	 */
>> +	secure = false;
>> +	if (job && ring->funcs->emit_frame_cntl) {
>> +		if (ib->flags & AMDGPU_IB_FLAGS_SECURE)
>> +			secure = true;
>> +		else
>>  			secure = false;
>> -			amdgpu_ring_emit_tmz(ring, false);
>> -		}
>> -
>> -		amdgpu_ring_emit_ib(ring, job, ib, status);
>> -		status &= ~AMDGPU_HAVE_CTX_SWITCH;
>> +		amdgpu_ring_emit_frame_cntl(ring, true, secure);
>>  	}
>> +	amdgpu_ring_emit_ib(ring, job, ib, status);
>> +	status &= ~AMDGPU_HAVE_CTX_SWITCH;
>> +	i += 1;
>> +	/* Send the rest of the IBs.
>> +	 */
>> +	if (job && ring->funcs->emit_frame_cntl) {
>> +		for ( ; i < num_ibs; ++i) {
>> +			ib = &ibs[i];
>> +
>> +			/* drop preamble IBs if we don't have a context switch */
>> +			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
>> +			    skip_preamble &&
>> +			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
>> +			    !amdgpu_mcbp &&
>> +			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>> +				continue;
> 
> Snip.
> 
>> +
>> +			if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
>> +				amdgpu_ring_emit_frame_cntl(ring, false, secure);
>> +				secure = !secure;
>> +				amdgpu_ring_emit_frame_cntl(ring, true, secure);
>> +			}
> 
> That's pretty good optimization! I spend quit a few time to understand this.

I know. I know you did. It's called experience.

When I saw you v1, it was a cringe. Seriously?

> 
>>  
>> -	if (secure) {
>> -		secure = false;
>> -		amdgpu_ring_emit_tmz(ring, false);
>> +			amdgpu_ring_emit_ib(ring, job, ib, status);
>> +			status &= ~AMDGPU_HAVE_CTX_SWITCH;
>> +		}
>> +		amdgpu_ring_emit_frame_cntl(ring, false, secure);
>> +	} else {
>> +		for ( ; i < num_ibs; ++i) {
>> +			ib = &ibs[i];
>> +
>> +			/* drop preamble IBs if we don't have a context switch */
>> +			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
>> +			    skip_preamble &&
>> +			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
>> +			    !amdgpu_mcbp &&
>> +			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
>> +				continue;
>> +
>> +			amdgpu_ring_emit_ib(ring, job, ib, status);
>> +			status &= ~AMDGPU_HAVE_CTX_SWITCH;
> 
> To pull the checking "job && ring->funcs->emit_frame_cntl" out of the loop,
> that make the implemenation more duplicated like below, we have to write

Yes, that is exactly what we want.

As I explained before and will explain again, and again, and again,
"job && ring->funcs->emit_frame_cntl" is static to the body of the loop,
and as such can be pulled OUT of the loop and it should.

This is a *formulaic* optimization exercise. Compilers and optimizers do it first.

To extrapolate, consider that what you'd eventually have is a HUGE long long loop
and everything inside it. Not good.

Regards,
Luben


> the same codes multiple times. I hope the implementation is more simple and
> readable. Please see my V2 patch that I just send out. We can try to use
> minimum changes to fix the issue.
> 
> 		for ( ; i < num_ibs; ++i) {
> 			ib = &ibs[i];
> 
> 			/* drop preamble IBs if we don't have a context switch */
> 			if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> 			    skip_preamble &&
> 			    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> 			    !amdgpu_mcbp &&
> 			    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> 				continue;
> 
>                         ...
> 			amdgpu_ring_emit_ib(ring, job, ib, status);
> 
> Thanks,
> Ray
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Fix per-IB secure flag GFX hang
  2020-02-27 21:15           ` Luben Tuikov
@ 2020-02-27 21:30             ` Alex Deucher
  0 siblings, 0 replies; 12+ messages in thread
From: Alex Deucher @ 2020-02-27 21:30 UTC (permalink / raw)
  To: Luben Tuikov
  Cc: Pelloux-prayer, Pierre-eric, Liu, Aaron, amd-gfx, Huang Rui,
	Deucher, Alexander, Koenig, Christian

On Thu, Feb 27, 2020 at 4:15 PM Luben Tuikov <luben.tuikov@amd.com> wrote:
>
> On 2020-02-27 6:56 a.m., Huang Rui wrote:
> > On Thu, Feb 27, 2020 at 06:39:03AM +0800, Tuikov, Luben wrote:
> >> Since commit "Move to a per-IB secure flag (TMZ)",
> >> we've been seeing hangs in GFX. Ray H. pointed out
> >> by sending a patch that we need to send FRAME
> >> CONTROL stop/start back-to-back, every time we
> >> flip the TMZ flag as per each IB we submit. That
> >> is, when we transition from TMZ to non-TMZ we have
> >> to send a stop with TMZ followed by a start with
> >> non-TMZ, and similarly for transitioning from
> >> non-TMZ into TMZ.
> >>
> >> This patch implements this, thus fixing the GFX
> >> hang.
> >>
> >> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> >> ---
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 87 +++++++++++++++++-------
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +-
> >>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 15 ++--
> >>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 13 ++--
> >>  4 files changed, 79 insertions(+), 41 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >> index 4b2342d11520..16d6df3304d3 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> >> @@ -216,40 +216,75 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> >>              amdgpu_ring_emit_cntxcntl(ring, status);
> >>      }
> >>
> >> -    secure = false;
> >> +    /* Find the first non-preamble IB.
> >> +     */
> >>      for (i = 0; i < num_ibs; ++i) {
> >>              ib = &ibs[i];
> >>
> >>              /* drop preamble IBs if we don't have a context switch */
> >> -            if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> >> -                skip_preamble &&
> >> -                !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> >> -                !amdgpu_mcbp &&
> >> -                !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> >> -                    continue;
> >> -
> >> -            /* If this IB is TMZ, add frame TMZ start packet,
> >> -             * else, turn off TMZ.
> >> -             */
> >> -            if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
> >> -                    if (!secure) {
> >> -                            secure = true;
> >> -                            amdgpu_ring_emit_tmz(ring, true);
> >> -                    }
> >> -            } else if (secure) {
> >> +            if (!(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) ||
> >> +                !skip_preamble ||
> >> +                (status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) ||
> >> +                amdgpu_mcbp ||
> >> +                amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> >> +                    break;
> >> +    }
> >> +    if (i >= num_ibs)
> >> +            goto Done;
> >> +    /* Setup initial TMZiness and send it off.
> >> +     */
> >> +    secure = false;
> >> +    if (job && ring->funcs->emit_frame_cntl) {
> >> +            if (ib->flags & AMDGPU_IB_FLAGS_SECURE)
> >> +                    secure = true;
> >> +            else
> >>                      secure = false;
> >> -                    amdgpu_ring_emit_tmz(ring, false);
> >> -            }
> >> -
> >> -            amdgpu_ring_emit_ib(ring, job, ib, status);
> >> -            status &= ~AMDGPU_HAVE_CTX_SWITCH;
> >> +            amdgpu_ring_emit_frame_cntl(ring, true, secure);
> >>      }
> >> +    amdgpu_ring_emit_ib(ring, job, ib, status);
> >> +    status &= ~AMDGPU_HAVE_CTX_SWITCH;
> >> +    i += 1;
> >> +    /* Send the rest of the IBs.
> >> +     */
> >> +    if (job && ring->funcs->emit_frame_cntl) {
> >> +            for ( ; i < num_ibs; ++i) {
> >> +                    ib = &ibs[i];
> >> +
> >> +                    /* drop preamble IBs if we don't have a context switch */
> >> +                    if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> >> +                        skip_preamble &&
> >> +                        !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> >> +                        !amdgpu_mcbp &&
> >> +                        !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> >> +                            continue;
> >
> > Snip.
> >
> >> +
> >> +                    if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> >> +                            amdgpu_ring_emit_frame_cntl(ring, false, secure);
> >> +                            secure = !secure;
> >> +                            amdgpu_ring_emit_frame_cntl(ring, true, secure);
> >> +                    }
> >
> > That's pretty good optimization! I spend quit a few time to understand this.
>
> I know. I know you did. It's called experience.
>
> When I saw you v1, it was a cringe. Seriously?

It may be a good optimization but if it's hard to understand it makes
the code harder to read and comprehend, that can lead to
misinterpretation and maintenance burdens.  It took me a while to
understand what was intended here.

Alex

>
> >
> >>
> >> -    if (secure) {
> >> -            secure = false;
> >> -            amdgpu_ring_emit_tmz(ring, false);
> >> +                    amdgpu_ring_emit_ib(ring, job, ib, status);
> >> +                    status &= ~AMDGPU_HAVE_CTX_SWITCH;
> >> +            }
> >> +            amdgpu_ring_emit_frame_cntl(ring, false, secure);
> >> +    } else {
> >> +            for ( ; i < num_ibs; ++i) {
> >> +                    ib = &ibs[i];
> >> +
> >> +                    /* drop preamble IBs if we don't have a context switch */
> >> +                    if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> >> +                        skip_preamble &&
> >> +                        !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> >> +                        !amdgpu_mcbp &&
> >> +                        !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> >> +                            continue;
> >> +
> >> +                    amdgpu_ring_emit_ib(ring, job, ib, status);
> >> +                    status &= ~AMDGPU_HAVE_CTX_SWITCH;
> >
> > To pull the checking "job && ring->funcs->emit_frame_cntl" out of the loop,
> > that make the implemenation more duplicated like below, we have to write
>
> Yes, that is exactly what we want.
>
> As I explained before and will explain again, and again, and again,
> "job && ring->funcs->emit_frame_cntl" is static to the body of the loop,
> and as such can be pulled OUT of the loop and it should.
>
> This is a *formulaic* optimization exercise. Compilers and optimizers do it first.
>
> To extrapolate, consider that what you'd eventually have is a HUGE long long loop
> and everything inside it. Not good.
>
> Regards,
> Luben
>
>
> > the same codes multiple times. I hope the implementation is more simple and
> > readable. Please see my V2 patch that I just send out. We can try to use
> > minimum changes to fix the issue.
> >
> >               for ( ; i < num_ibs; ++i) {
> >                       ib = &ibs[i];
> >
> >                       /* drop preamble IBs if we don't have a context switch */
> >                       if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> >                           skip_preamble &&
> >                           !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
> >                           !amdgpu_mcbp &&
> >                           !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
> >                               continue;
> >
> >                         ...
> >                       amdgpu_ring_emit_ib(ring, job, ib, status);
> >
> > Thanks,
> > Ray
> >
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-02-27 21:30 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-25 13:57 [PATCH] drm/amdgpu: fix the gfx hang while use per-ib secure flag Huang Rui
2020-02-25 14:10 ` Pierre-Eric Pelloux-Prayer
2020-02-25 14:10 ` Alex Deucher
2020-02-25 14:30 ` Liu, Aaron
2020-02-25 22:56 ` Luben Tuikov
2020-02-25 23:40 ` Luben Tuikov
2020-02-26 16:26   ` Luben Tuikov
2020-02-26 22:39     ` [PATCH 0/1] Fixing the GFX hang Luben Tuikov
2020-02-26 22:39       ` [PATCH 1/1] drm/amdgpu: Fix per-IB secure flag " Luben Tuikov
2020-02-27 11:56         ` Huang Rui
2020-02-27 21:15           ` Luben Tuikov
2020-02-27 21:30             ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.