All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] fix dma frame of GFX8
@ 2016-08-25  5:58 Monk Liu
       [not found] ` <1472104695-2177-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Monk Liu @ 2016-08-25  5:58 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

original SWITCH_BUFFER scheme for GFX8 doesn't support virtualization, and
it give worse performance, now we use the classic scheme to support 
virtualization and gain more performance by only inserting one SWITCH_BUFFER
at the bottom of each command submit, and doing this requires 128dw NOP inserted
after VM flush as well to prevent CE trigger vm fault before De vm_flush done.

now 256 dw for each command submit is enough.

Monk Liu (2):
  drm/amdgpu:fix gfx ib schedule
  drm/amdgpu:256dw is enough for one ib_schedule

 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 10 +++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +++++++++++++---------------
 3 files changed, 24 insertions(+), 16 deletions(-)

-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
       [not found] ` <1472104695-2177-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-25  5:58   ` Monk Liu
       [not found]     ` <1472104695-2177-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2016-08-25  5:58   ` [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule Monk Liu
  1 sibling, 1 reply; 9+ messages in thread
From: Monk Liu @ 2016-08-25  5:58 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

for GFX 8, originally we use double switch_buffer to
prevents CE go ahead of DE, thus it can avoid VM fault
in case of VM_flush not finish. but double SWITCH_BUFFER
drops performance, and world switch preemption requires
that only one SWITCH_BUFFER is needed at the end of
DMAframe.

to Pevent CE vm fault without double switch_buffer,
we need to insert 128 dw NOP after vm flush because
CE and DE can have at most 128 DW gap from hw perspective.

Even no context switch, SWITCH_BUFFER is encouraged to use
to get better CE performance.

Change-Id: Ifdabf23f97e74156c1f66dddd0918ea7ffcddb20
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +++++++++++++---------------
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cb0098a..a935831 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
 	void (*end_use)(struct amdgpu_ring *ring);
 	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val);
 	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
+	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
 };
 
 /*
@@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
 #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a31d7ef..4e5b2f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
 	ring->current_ctx = ctx;
+
+	/* Insert one SB at the end of DMAframe,
+	 * Now only GFX8 ip need handling like this, CI won't
+	 * insert one SB at this place, instead it insert double
+	 * switch buffer after VM FLUSH and PIPELINE sync.
+	 */
+	if (ring->funcs->emit_switch_buffer)
+		amdgpu_ring_emit_switch_buffer(ring);
 	amdgpu_ring_commit(ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index dfa2288..41a2ef1 100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 {
 	u32 header, control = 0;
 
-	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
-	if (ctx_switch) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-	}
-
 	if (ib->flags & AMDGPU_IB_FLAG_CE)
 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
 	else
@@ -6013,11 +6007,9 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 4); /* poll interval */
 
 	if (usepfp) {
-		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
+		/* sync PFP to ME, otherwise we might get invalid PFP reads */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+		amdgpu_ring_write(ring, 0x0);
 	}
 }
 
@@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 		amdgpu_ring_write(ring, 0x0);
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
 	}
+
+	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
+	amdgpu_ring_insert_nop(ring, 128);
 }
 
 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
@@ -6170,6 +6161,12 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct amdgpu_ring *ring, u32 idx, u32 v
 	amdgpu_ring_write(ring, val);
 }
 
+static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -6477,6 +6474,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_switch_buffer = gfx_v8_ring_emit_sb,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
       [not found] ` <1472104695-2177-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2016-08-25  5:58   ` [PATCH 1/2] drm/amdgpu:fix gfx ib schedule Monk Liu
@ 2016-08-25  5:58   ` Monk Liu
       [not found]     ` <1472104695-2177-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Monk Liu @ 2016-08-25  5:58 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 4e5b2f3..b82904c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return -EINVAL;
 	}
 
-	r = amdgpu_ring_alloc(ring, 256 * num_ibs);
+	r = amdgpu_ring_alloc(ring, 256);
 	if (r) {
 		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
 		return r;
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
       [not found]     ` <1472104695-2177-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-25  8:12       ` Christian König
       [not found]         ` <41466125-a69e-07c7-bb18-153c841c1d48-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2016-08-25  8:12 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Well userspace is allowed to send any number of IBs down to this if I 
remember correctly.

So we clearly need something depending on the number of IBs or reject 
submissions with to many IBs earlier in the CS.

Otherwise we clearly open up a possible problem where userspace can 
trigger a ring overrun.

Regards,
Christian.

Am 25.08.2016 um 07:58 schrieb Monk Liu:
> Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4e5b2f3..b82904c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		return -EINVAL;
>   	}
>   
> -	r = amdgpu_ring_alloc(ring, 256 * num_ibs);
> +	r = amdgpu_ring_alloc(ring, 256);
>   	if (r) {
>   		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
>   		return r;


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
       [not found]         ` <41466125-a69e-07c7-bb18-153c841c1d48-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-25  8:28           ` Liu, Monk
       [not found]             ` <MWHPR12MB11821CF6BAF190E13914704984ED0-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Liu, Monk @ 2016-08-25  8:28 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

But even user space will submit 10 ibs this submission, the calculate of 256*ibs is totally overflow, remember that ring buffer is only 4kb size

-----Original Message-----
From: Christian König [mailto:deathsimple@vodafone.de] 
Sent: Thursday, August 25, 2016 4:12 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule

Well userspace is allowed to send any number of IBs down to this if I remember correctly.

So we clearly need something depending on the number of IBs or reject submissions with to many IBs earlier in the CS.

Otherwise we clearly open up a possible problem where userspace can trigger a ring overrun.

Regards,
Christian.

Am 25.08.2016 um 07:58 schrieb Monk Liu:
> Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 4e5b2f3..b82904c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		return -EINVAL;
>   	}
>   
> -	r = amdgpu_ring_alloc(ring, 256 * num_ibs);
> +	r = amdgpu_ring_alloc(ring, 256);
>   	if (r) {
>   		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
>   		return r;


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
       [not found]             ` <MWHPR12MB11821CF6BAF190E13914704984ED0-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2016-08-25  8:30               ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2016-08-25  8:30 UTC (permalink / raw)
  To: Liu, Monk, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

True indeed. Something like 256 + 32 * num_ibs (or even 16 * num_ibs??? 
Need to double check) should do as well.

Christian.

Am 25.08.2016 um 10:28 schrieb Liu, Monk:
> But even user space will submit 10 ibs this submission, the calculate of 256*ibs is totally overflow, remember that ring buffer is only 4kb size
>
> -----Original Message-----
> From: Christian König [mailto:deathsimple@vodafone.de]
> Sent: Thursday, August 25, 2016 4:12 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule
>
> Well userspace is allowed to send any number of IBs down to this if I remember correctly.
>
> So we clearly need something depending on the number of IBs or reject submissions with to many IBs earlier in the CS.
>
> Otherwise we clearly open up a possible problem where userspace can trigger a ring overrun.
>
> Regards,
> Christian.
>
> Am 25.08.2016 um 07:58 schrieb Monk Liu:
>> Change-Id: I1ee3258276868a753e536ae2d9ae1b12e7eaf791
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
>>    1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 4e5b2f3..b82904c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -151,7 +151,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>    		return -EINVAL;
>>    	}
>>    
>> -	r = amdgpu_ring_alloc(ring, 256 * num_ibs);
>> +	r = amdgpu_ring_alloc(ring, 256);
>>    	if (r) {
>>    		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
>>    		return r;
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
       [not found]     ` <1472104695-2177-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-25 13:32       ` Deucher, Alexander
       [not found]         ` <MWHPR12MB1694872A4032E24D0D45ACC9F7ED0-Gy0DoCVfaSW4WA4dJ5YXGAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Deucher, Alexander @ 2016-08-25 13:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Thursday, August 25, 2016 1:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
> 
> for GFX 8, originally we use double switch_buffer to
> prevents CE go ahead of DE, thus it can avoid VM fault
> in case of VM_flush not finish. but double SWITCH_BUFFER
> drops performance, and world switch preemption requires
> that only one SWITCH_BUFFER is needed at the end of
> DMAframe.
> 
> to Pevent CE vm fault without double switch_buffer,
> we need to insert 128 dw NOP after vm flush because
> CE and DE can have at most 128 DW gap from hw perspective.
> 
> Even no context switch, SWITCH_BUFFER is encouraged to use
> to get better CE performance.
> 
> Change-Id: Ifdabf23f97e74156c1f66dddd0918ea7ffcddb20
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++++++++
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +++++++++++++-------------

What about gfx7?  Does that need similar fixes?

Alex

> --
>  3 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cb0098a..a935831 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
>  	void (*end_use)(struct amdgpu_ring *ring);
>  	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset,
> uint32_t val);
>  	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> +	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>  };
> 
>  /*
> @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring
> *ring)
>  #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs-
> >emit_hdp_invalidate((r))
>  #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
>  #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
> +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs-
> >emit_switch_buffer((r))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs-
> >patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index a31d7ef..4e5b2f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>  		amdgpu_ring_patch_cond_exec(ring, patch_offset);
> 
>  	ring->current_ctx = ctx;
> +
> +	/* Insert one SB at the end of DMAframe,
> +	 * Now only GFX8 ip need handling like this, CI won't
> +	 * insert one SB at this place, instead it insert double
> +	 * switch buffer after VM FLUSH and PIPELINE sync.
> +	 */
> +	if (ring->funcs->emit_switch_buffer)
> +		amdgpu_ring_emit_switch_buffer(ring);
>  	amdgpu_ring_commit(ring);
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index dfa2288..41a2ef1 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct
> amdgpu_ring *ring,
>  {
>  	u32 header, control = 0;
> 
> -	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
> -	if (ctx_switch) {
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -	}
> -
>  	if (ib->flags & AMDGPU_IB_FLAG_CE)
>  		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>  	else
> @@ -6013,11 +6007,9 @@ static void
> gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>  	amdgpu_ring_write(ring, 4); /* poll interval */
> 
>  	if (usepfp) {
> -		/* synce CE with ME to prevent CE fetch CEIB before context
> switch done */
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> +		/* sync PFP to ME, otherwise we might get invalid PFP reads
> */
> +		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
> +		amdgpu_ring_write(ring, 0x0);
>  	}
>  }
> 
> @@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>  		/* sync PFP to ME, otherwise we might get invalid PFP reads
> */
>  		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
>  		amdgpu_ring_write(ring, 0x0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
>  	}
> +
> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush
> finish */
> +	amdgpu_ring_insert_nop(ring, 128);
>  }
> 
>  static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> @@ -6170,6 +6161,12 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct
> amdgpu_ring *ring, u32 idx, u32 v
>  	amdgpu_ring_write(ring, val);
>  }
> 
> +static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
> +{
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> +	amdgpu_ring_write(ring, 0);
> +}
> +
>  static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device
> *adev,
>  						 enum
> amdgpu_interrupt_state state)
>  {
> @@ -6477,6 +6474,7 @@ static const struct amdgpu_ring_funcs
> gfx_v8_0_ring_funcs_gfx = {
>  	.test_ib = gfx_v8_0_ring_test_ib,
>  	.insert_nop = amdgpu_ring_insert_nop,
>  	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_switch_buffer = gfx_v8_ring_emit_sb,
>  };
> 
>  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> --
> 1.9.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
       [not found]         ` <MWHPR12MB1694872A4032E24D0D45ACC9F7ED0-Gy0DoCVfaSW4WA4dJ5YXGAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2016-08-26  3:01           ` Liu, Monk
       [not found]             ` <MWHPR12MB1182D7D62658692C1F90D64984EC0-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Liu, Monk @ 2016-08-26  3:01 UTC (permalink / raw)
  To: Deucher, Alexander, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Not needed at least in windows kmd, I don't know if it is out of what reason, with my guess:

1, CI doesn’t support world switch, so no need for the SWITCH_BUFFER at end (only one SB at end is for world switch).
2, I don’t know if that limit can also applied on CI (limit is CE can at most ahead of DE by a certain number DW )

So I think CE can keep the original logic.

BR Monk


-----Original Message-----
From: Deucher, Alexander 
Sent: Thursday, August 25, 2016 9:33 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Liu, Monk <Monk.Liu@amd.com>
Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf 
> Of Monk Liu
> Sent: Thursday, August 25, 2016 1:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
> 
> for GFX 8, originally we use double switch_buffer to prevents CE go 
> ahead of DE, thus it can avoid VM fault in case of VM_flush not 
> finish. but double SWITCH_BUFFER drops performance, and world switch 
> preemption requires that only one SWITCH_BUFFER is needed at the end 
> of DMAframe.
> 
> to Pevent CE vm fault without double switch_buffer, we need to insert 
> 128 dw NOP after vm flush because CE and DE can have at most 128 DW 
> gap from hw perspective.
> 
> Even no context switch, SWITCH_BUFFER is encouraged to use to get 
> better CE performance.
> 
> Change-Id: Ifdabf23f97e74156c1f66dddd0918ea7ffcddb20
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++++++++  
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +++++++++++++-------------

What about gfx7?  Does that need similar fixes?

Alex

> --
>  3 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cb0098a..a935831 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
>  	void (*end_use)(struct amdgpu_ring *ring);
>  	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, 
> uint32_t val);
>  	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> +	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>  };
> 
>  /*
> @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring
> *ring)
>  #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs-
> >emit_hdp_invalidate((r))
>  #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), 
> (i), (v))  #define amdgpu_ring_emit_rreg(r, i) 
> (r)->funcs->emit_rreg((r), (i))
> +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs-
> >emit_switch_buffer((r))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))  
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))  
> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs-
> >patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index a31d7ef..4e5b2f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> unsigned num_ibs,
>  		amdgpu_ring_patch_cond_exec(ring, patch_offset);
> 
>  	ring->current_ctx = ctx;
> +
> +	/* Insert one SB at the end of DMAframe,
> +	 * Now only GFX8 ip need handling like this, CI won't
> +	 * insert one SB at this place, instead it insert double
> +	 * switch buffer after VM FLUSH and PIPELINE sync.
> +	 */
> +	if (ring->funcs->emit_switch_buffer)
> +		amdgpu_ring_emit_switch_buffer(ring);
>  	amdgpu_ring_commit(ring);
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index dfa2288..41a2ef1 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct 
> amdgpu_ring *ring,  {
>  	u32 header, control = 0;
> 
> -	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
> -	if (ctx_switch) {
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -	}
> -
>  	if (ib->flags & AMDGPU_IB_FLAG_CE)
>  		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>  	else
> @@ -6013,11 +6007,9 @@ static void
> gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>  	amdgpu_ring_write(ring, 4); /* poll interval */
> 
>  	if (usepfp) {
> -		/* synce CE with ME to prevent CE fetch CEIB before context
> switch done */
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> +		/* sync PFP to ME, otherwise we might get invalid PFP reads
> */
> +		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
> +		amdgpu_ring_write(ring, 0x0);
>  	}
>  }
> 
> @@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>  		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>  		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>  		amdgpu_ring_write(ring, 0x0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
>  	}
> +
> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush
> finish */
> +	amdgpu_ring_insert_nop(ring, 128);
>  }
> 
>  static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 
> @@ -6170,6 +6161,12 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct
> amdgpu_ring *ring, u32 idx, u32 v
>  	amdgpu_ring_write(ring, val);
>  }
> 
> +static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) {
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> +	amdgpu_ring_write(ring, 0);
> +}
> +
>  static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device 
> *adev,
>  						 enum
> amdgpu_interrupt_state state)
>  {
> @@ -6477,6 +6474,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v8_0_ring_funcs_gfx = {
>  	.test_ib = gfx_v8_0_ring_test_ib,
>  	.insert_nop = amdgpu_ring_insert_nop,
>  	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_switch_buffer = gfx_v8_ring_emit_sb,
>  };
> 
>  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> --
> 1.9.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
       [not found]             ` <MWHPR12MB1182D7D62658692C1F90D64984EC0-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2016-08-26  3:39               ` Liu, Monk
  0 siblings, 0 replies; 9+ messages in thread
From: Liu, Monk @ 2016-08-26  3:39 UTC (permalink / raw)
  To: Liu, Monk, Deucher, Alexander, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

But for VI. My patch will reduce those 5 redundant SB and one SB can always use ping-pong buffer, 
so the performance looks raised around 5%.

BR Monk

-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Liu, Monk
Sent: Friday, August 26, 2016 11:01 AM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

Not needed at least in windows kmd, I don't know if it is out of what reason, with my guess:

1, CI doesn’t support world switch, so no need for the SWITCH_BUFFER at end (only one SB at end is for world switch).
2, I don’t know if that limit can also applied on CI (limit is CE can at most ahead of DE by a certain number DW )

So I think CE can keep the original logic.

BR Monk


-----Original Message-----
From: Deucher, Alexander
Sent: Thursday, August 25, 2016 9:33 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Liu, Monk <Monk.Liu@amd.com>
Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf 
> Of Monk Liu
> Sent: Thursday, August 25, 2016 1:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
> 
> for GFX 8, originally we use double switch_buffer to prevents CE go 
> ahead of DE, thus it can avoid VM fault in case of VM_flush not 
> finish. but double SWITCH_BUFFER drops performance, and world switch 
> preemption requires that only one SWITCH_BUFFER is needed at the end 
> of DMAframe.
> 
> to Pevent CE vm fault without double switch_buffer, we need to insert
> 128 dw NOP after vm flush because CE and DE can have at most 128 DW 
> gap from hw perspective.
> 
> Even no context switch, SWITCH_BUFFER is encouraged to use to get 
> better CE performance.
> 
> Change-Id: Ifdabf23f97e74156c1f66dddd0918ea7ffcddb20
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++++++++ 
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +++++++++++++-------------

What about gfx7?  Does that need similar fixes?

Alex

> --
>  3 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cb0098a..a935831 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
>  	void (*end_use)(struct amdgpu_ring *ring);
>  	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, 
> uint32_t val);
>  	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> +	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>  };
> 
>  /*
> @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring
> *ring)
>  #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs-
> >emit_hdp_invalidate((r))
>  #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), 
> (i), (v))  #define amdgpu_ring_emit_rreg(r, i) 
> (r)->funcs->emit_rreg((r), (i))
> +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs-
> >emit_switch_buffer((r))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) 
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) 
> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs-
> >patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index a31d7ef..4e5b2f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> unsigned num_ibs,
>  		amdgpu_ring_patch_cond_exec(ring, patch_offset);
> 
>  	ring->current_ctx = ctx;
> +
> +	/* Insert one SB at the end of DMAframe,
> +	 * Now only GFX8 ip need handling like this, CI won't
> +	 * insert one SB at this place, instead it insert double
> +	 * switch buffer after VM FLUSH and PIPELINE sync.
> +	 */
> +	if (ring->funcs->emit_switch_buffer)
> +		amdgpu_ring_emit_switch_buffer(ring);
>  	amdgpu_ring_commit(ring);
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index dfa2288..41a2ef1 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct 
> amdgpu_ring *ring,  {
>  	u32 header, control = 0;
> 
> -	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
> -	if (ctx_switch) {
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -	}
> -
>  	if (ib->flags & AMDGPU_IB_FLAG_CE)
>  		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>  	else
> @@ -6013,11 +6007,9 @@ static void
> gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>  	amdgpu_ring_write(ring, 4); /* poll interval */
> 
>  	if (usepfp) {
> -		/* synce CE with ME to prevent CE fetch CEIB before context
> switch done */
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> +		/* sync PFP to ME, otherwise we might get invalid PFP reads
> */
> +		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
> +		amdgpu_ring_write(ring, 0x0);
>  	}
>  }
> 
> @@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>  		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>  		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>  		amdgpu_ring_write(ring, 0x0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
>  	}
> +
> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush
> finish */
> +	amdgpu_ring_insert_nop(ring, 128);
>  }
> 
>  static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 
> @@ -6170,6 +6161,12 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct
> amdgpu_ring *ring, u32 idx, u32 v
>  	amdgpu_ring_write(ring, val);
>  }
> 
> +static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) {
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> +	amdgpu_ring_write(ring, 0);
> +}
> +
>  static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device 
> *adev,
>  						 enum
> amdgpu_interrupt_state state)
>  {
> @@ -6477,6 +6474,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v8_0_ring_funcs_gfx = {
>  	.test_ib = gfx_v8_0_ring_test_ib,
>  	.insert_nop = amdgpu_ring_insert_nop,
>  	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_switch_buffer = gfx_v8_ring_emit_sb,
>  };
> 
>  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> --
> 1.9.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-08-26  3:39 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-25  5:58 [PATCH 0/2] fix dma frame of GFX8 Monk Liu
     [not found] ` <1472104695-2177-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-08-25  5:58   ` [PATCH 1/2] drm/amdgpu:fix gfx ib schedule Monk Liu
     [not found]     ` <1472104695-2177-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-08-25 13:32       ` Deucher, Alexander
     [not found]         ` <MWHPR12MB1694872A4032E24D0D45ACC9F7ED0-Gy0DoCVfaSW4WA4dJ5YXGAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2016-08-26  3:01           ` Liu, Monk
     [not found]             ` <MWHPR12MB1182D7D62658692C1F90D64984EC0-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2016-08-26  3:39               ` Liu, Monk
2016-08-25  5:58   ` [PATCH 2/2] drm/amdgpu:256dw is enough for one ib_schedule Monk Liu
     [not found]     ` <1472104695-2177-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-08-25  8:12       ` Christian König
     [not found]         ` <41466125-a69e-07c7-bb18-153c841c1d48-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-25  8:28           ` Liu, Monk
     [not found]             ` <MWHPR12MB11821CF6BAF190E13914704984ED0-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2016-08-25  8:30               ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.