All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amdgpu:add switch buffer to end of CS
@ 2016-08-29  2:55 Monk Liu
       [not found] ` <1472439337-8002-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Monk Liu @ 2016-08-29  2:55 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

sync switch buffer scheme with windows kmd for gfx v8,
Now always and only insert one switch buffer to the
end of CS.

Change-Id: Ief8539b2ad91ccb38b9adbfb54e27d8282f3a3bd
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 13 +++++++------
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cb0098a..a935831 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
 	void (*end_use)(struct amdgpu_ring *ring);
 	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val);
 	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
+	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
 };
 
 /*
@@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
 #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a31d7ef..029ee79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -210,6 +210,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
 	ring->current_ctx = ctx;
+	if (ring->funcs->emit_switch_buffer)
+		amdgpu_ring_emit_switch_buffer(ring);
 	amdgpu_ring_commit(ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index dfa2288..26fced0 100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 {
 	u32 header, control = 0;
 
-	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
-	if (ctx_switch) {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-	}
-
 	if (ib->flags & AMDGPU_IB_FLAG_CE)
 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
 	else
@@ -6170,6 +6164,12 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct amdgpu_ring *ring, u32 idx, u32 v
 	amdgpu_ring_write(ring, val);
 }
 
+static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -6477,6 +6477,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_switch_buffer = gfx_v8_ring_emit_sb,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found] ` <1472439337-8002-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-29  2:55   ` Monk Liu
       [not found]     ` <1472439337-8002-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2016-08-29  2:55   ` [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL Monk Liu
  2016-08-29  8:04   ` [PATCH 1/4] drm/amdgpu:add switch buffer to end of CS Christian König
  2 siblings, 1 reply; 17+ messages in thread
From: Monk Liu @ 2016-08-29  2:55 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

CE & DE can have most up to 128dw as the gap between them
so to sync CE nad DE we don't need double SWITCH_BUFFERs any
more, which is urgly and harm performance, we only need
insert 128NOP after VM flush to prevent CE vm fault.

Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 26fced0..ce1e616 100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6005,14 +6005,6 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, seq);
 	amdgpu_ring_write(ring, 0xffffffff);
 	amdgpu_ring_write(ring, 4); /* poll interval */
-
-	if (usepfp) {
-		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-	}
 }
 
 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
@@ -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
 
+	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
+	amdgpu_ring_insert_nop(ring, 128);
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)) |
@@ -6059,11 +6054,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 		amdgpu_ring_write(ring, 0x0);
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
 	}
+
+	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
+	amdgpu_ring_insert_nop(ring, 128);
 }
 
 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL
       [not found] ` <1472439337-8002-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2016-08-29  2:55   ` [PATCH 2/4] drm/amdgpu:new method to sync ce&de Monk Liu
@ 2016-08-29  2:55   ` Monk Liu
       [not found]     ` <1472439337-8002-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2016-08-29  8:04   ` [PATCH 1/4] drm/amdgpu:add switch buffer to end of CS Christian König
  2 siblings, 1 reply; 17+ messages in thread
From: Monk Liu @ 2016-08-29  2:55 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

use CONTEXT_CONTROL package to dynamically skip
preamble IB and other load_xxx command in sequence.

Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  9 +++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 16 +++++++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9132719..a9dfeb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -339,6 +339,7 @@ struct amdgpu_ring_funcs {
 	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val);
 	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
 	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 };
 
 /*
@@ -1050,6 +1051,7 @@ struct amdgpu_ctx {
 	spinlock_t		ring_lock;
 	struct fence            **fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
+	bool preamble_presented;
 };
 
 struct amdgpu_ctx_mgr {
@@ -1320,8 +1322,13 @@ struct amdgpu_cs_parser {
 
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
+	bool preamble_present; /* True means this command submit involves a preamble IB */
 };
 
+#define PREAMBLE_IB_PRESENT 		(1 << 0) /* bit set means command submit involves a preamble IB */
+#define PREAMBLE_IB_PRESENT_FIRST	(1 << 1) /* bit set means preamble IB is first presented in belonging context */
+#define HAVE_CTX_SWITCH		(1 << 2) /* bit set means context switch occured */
+
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amdgpu_device	*adev;
@@ -1330,6 +1337,7 @@ struct amdgpu_job {
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
 	struct fence		*fence; /* the hw fence */
+	uint32_t		preamble_status;
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
@@ -2374,6 +2382,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
 #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 78d3831..f2d739a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -711,6 +711,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (r)
 			return r;
 
+		if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+			parser->preamble_present = true;
+
 		if (parser->job->ring && parser->job->ring != ring)
 			return -EINVAL;
 
@@ -849,6 +852,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		return r;
 	}
 
+	if (p->preamble_present) {
+		job->preamble_status |= PREAMBLE_IB_PRESENT;
+		if (!p->ctx->preamble_presented)
+			job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
+	}
+
 	job->owner = p->filp;
 	job->fence_ctx = entity->fence_context;
 	p->fence = fence_get(&job->base.s_fence->finished);
@@ -859,6 +868,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 
+	if (p->preamble_present)
+		p->ctx->preamble_presented = true;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index df379c7..7c501ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
-	bool skip_preamble, need_ctx_switch;
+	bool need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
 	uint64_t fence_ctx;
+	uint32_t status = 0;
 
 	unsigned i;
 	int r = 0;
@@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	/* always set cond_exec_polling to CONTINUE */
 	*ring->cond_exe_cpu_addr = 1;
 
-	skip_preamble = ring->current_ctx == fence_ctx;
 	need_ctx_switch = ring->current_ctx != fence_ctx;
+	if (job && ring->funcs->emit_cntxcntl) {
+		if (need_ctx_switch)
+			status |= HAVE_CTX_SWITCH;
+		status |= job->preamble_status;
+		amdgpu_ring_emit_cntxcntl(ring, status);
+	}
+
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
-
-		/* drop preamble IBs if we don't have a context switch */
-		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
-			continue;
-
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ce1e616..8f6d860 100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6164,6 +6164,36 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0);
 }
 
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+	uint32_t dw2 = 0;
+
+	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+	if (flags & HAVE_CTX_SWITCH) {
+		/* set load_global_config & load_global_uconfig */
+		dw2 |= 0x8001;
+		/* set load_cs_sh_regs */
+		dw2 |= 0x01000000;
+		/* set load_per_context_state & load_gfx_sh_regs for GFX */
+		if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
+			dw2 |= 0x10002;
+
+		/* set load_ce_ram if preamble presented */
+		if (PREAMBLE_IB_PRESENT & flags)
+			dw2 |= 0x10000000;
+	} else {
+		/* still load_ce_ram if this is the first time preamble presented
+		 * although there is no context switch happens.
+		 */
+		if (PREAMBLE_IB_PRESENT_FIRST & flags)
+			dw2 |= 0x10000000;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, dw2);
+	amdgpu_ring_write(ring, 0);
+}
+
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -6472,6 +6502,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
+	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -6490,6 +6521,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu:add switch buffer to end of CS
       [not found] ` <1472439337-8002-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2016-08-29  2:55   ` [PATCH 2/4] drm/amdgpu:new method to sync ce&de Monk Liu
  2016-08-29  2:55   ` [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL Monk Liu
@ 2016-08-29  8:04   ` Christian König
  2 siblings, 0 replies; 17+ messages in thread
From: Christian König @ 2016-08-29  8:04 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.08.2016 um 04:55 schrieb Monk Liu:
> sync switch buffer scheme with windows kmd for gfx v8,
> Now always and only insert one switch buffer to the
> end of CS.
>
> Change-Id: Ief8539b2ad91ccb38b9adbfb54e27d8282f3a3bd
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

This patch doesn't seem to apply on any of the public branches, please 
make sure to rebase on to of amd-staging-4.6 before sending it out.

Apart from this the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  2 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 13 +++++++------
>   3 files changed, 11 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cb0098a..a935831 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
>   	void (*end_use)(struct amdgpu_ring *ring);
>   	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val);
>   	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> +	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>   };
>   
>   /*
> @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
>   #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
>   #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
> +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index a31d7ef..029ee79 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -210,6 +210,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		amdgpu_ring_patch_cond_exec(ring, patch_offset);
>   
>   	ring->current_ctx = ctx;
> +	if (ring->funcs->emit_switch_buffer)
> +		amdgpu_ring_emit_switch_buffer(ring);
>   	amdgpu_ring_commit(ring);
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index dfa2288..26fced0 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
>   {
>   	u32 header, control = 0;
>   
> -	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
> -	if (ctx_switch) {
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -	}
> -
>   	if (ib->flags & AMDGPU_IB_FLAG_CE)
>   		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>   	else
> @@ -6170,6 +6164,12 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct amdgpu_ring *ring, u32 idx, u32 v
>   	amdgpu_ring_write(ring, val);
>   }
>   
> +static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
> +{
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> +	amdgpu_ring_write(ring, 0);
> +}
> +
>   static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   						 enum amdgpu_interrupt_state state)
>   {
> @@ -6477,6 +6477,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
>   	.test_ib = gfx_v8_0_ring_test_ib,
>   	.insert_nop = amdgpu_ring_insert_nop,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_switch_buffer = gfx_v8_ring_emit_sb,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]     ` <1472439337-8002-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-29  8:05       ` Christian König
       [not found]         ` <283cbf94-05bf-4d30-c03f-731dd34ac653-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2016-08-29 13:21       ` Deucher, Alexander
  1 sibling, 1 reply; 17+ messages in thread
From: Christian König @ 2016-08-29  8:05 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.08.2016 um 04:55 schrieb Monk Liu:
> CE & DE can have most up to 128dw as the gap between them
> so to sync CE nad DE we don't need double SWITCH_BUFFERs any
> more, which is urgly and harm performance, we only need
> insert 128NOP after VM flush to prevent CE vm fault.
>
> Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Looks good to me, but only the GFX engines have a CE. So syncing on the 
compute engines is pretty much pointless.

So I suggest that you move this into the "usepfp" if branch as well.

With that fixed the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------
>   1 file changed, 6 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 26fced0..ce1e616 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6005,14 +6005,6 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>   	amdgpu_ring_write(ring, seq);
>   	amdgpu_ring_write(ring, 0xffffffff);
>   	amdgpu_ring_write(ring, 4); /* poll interval */
> -
> -	if (usepfp) {
> -		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -	}
>   }
>   
>   static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
> @@ -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   {
>   	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
>   
> +	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
> +	amdgpu_ring_insert_nop(ring, 128);
> +
>   	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>   	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>   				 WRITE_DATA_DST_SEL(0)) |
> @@ -6059,11 +6054,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>   		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>   		amdgpu_ring_write(ring, 0x0);
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
>   	}
> +
> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
> +	amdgpu_ring_insert_nop(ring, 128);
>   }
>   
>   static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL
       [not found]     ` <1472439337-8002-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-29  8:10       ` Christian König
       [not found]         ` <106871c7-6405-99c9-4f42-beb0be317b2d-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Christian König @ 2016-08-29  8:10 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.08.2016 um 04:55 schrieb Monk Liu:
> use CONTEXT_CONTROL package to dynamically skip
> preamble IB and other load_xxx command in sequence.
>
> Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Again, please rebase on top of amd-staging-4.6. Apart from that I need 
to take a closer look later today.

BTW: Where is patch #3?

Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  9 +++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 16 +++++++++-------
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 32 ++++++++++++++++++++++++++++++++
>   4 files changed, 62 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9132719..a9dfeb5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -339,6 +339,7 @@ struct amdgpu_ring_funcs {
>   	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val);
>   	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
>   	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
> +	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
>   };
>   
>   /*
> @@ -1050,6 +1051,7 @@ struct amdgpu_ctx {
>   	spinlock_t		ring_lock;
>   	struct fence            **fences;
>   	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
> +	bool preamble_presented;
>   };
>   
>   struct amdgpu_ctx_mgr {
> @@ -1320,8 +1322,13 @@ struct amdgpu_cs_parser {
>   
>   	/* user fence */
>   	struct amdgpu_bo_list_entry	uf_entry;
> +	bool preamble_present; /* True means this command submit involves a preamble IB */
>   };
>   
> +#define PREAMBLE_IB_PRESENT 		(1 << 0) /* bit set means command submit involves a preamble IB */
> +#define PREAMBLE_IB_PRESENT_FIRST	(1 << 1) /* bit set means preamble IB is first presented in belonging context */
> +#define HAVE_CTX_SWITCH		(1 << 2) /* bit set means context switch occured */
> +
>   struct amdgpu_job {
>   	struct amd_sched_job    base;
>   	struct amdgpu_device	*adev;
> @@ -1330,6 +1337,7 @@ struct amdgpu_job {
>   	struct amdgpu_sync	sync;
>   	struct amdgpu_ib	*ibs;
>   	struct fence		*fence; /* the hw fence */
> +	uint32_t		preamble_status;
>   	uint32_t		num_ibs;
>   	void			*owner;
>   	uint64_t		fence_ctx; /* the fence_context this job uses */
> @@ -2374,6 +2382,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
>   #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
>   #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
> +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>   #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 78d3831..f2d739a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -711,6 +711,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   		if (r)
>   			return r;
>   
> +		if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
> +			parser->preamble_present = true;
> +
>   		if (parser->job->ring && parser->job->ring != ring)
>   			return -EINVAL;
>   
> @@ -849,6 +852,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   		return r;
>   	}
>   
> +	if (p->preamble_present) {
> +		job->preamble_status |= PREAMBLE_IB_PRESENT;
> +		if (!p->ctx->preamble_presented)
> +			job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
> +	}
> +
>   	job->owner = p->filp;
>   	job->fence_ctx = entity->fence_context;
>   	p->fence = fence_get(&job->base.s_fence->finished);
> @@ -859,6 +868,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	trace_amdgpu_cs_ioctl(job);
>   	amd_sched_entity_push_job(&job->base);
>   
> +	if (p->preamble_present)
> +		p->ctx->preamble_presented = true;
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index df379c7..7c501ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   {
>   	struct amdgpu_device *adev = ring->adev;
>   	struct amdgpu_ib *ib = &ibs[0];
> -	bool skip_preamble, need_ctx_switch;
> +	bool need_ctx_switch;
>   	unsigned patch_offset = ~0;
>   	struct amdgpu_vm *vm;
>   	uint64_t fence_ctx;
> +	uint32_t status = 0;
>   
>   	unsigned i;
>   	int r = 0;
> @@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   	/* always set cond_exec_polling to CONTINUE */
>   	*ring->cond_exe_cpu_addr = 1;
>   
> -	skip_preamble = ring->current_ctx == fence_ctx;
>   	need_ctx_switch = ring->current_ctx != fence_ctx;
> +	if (job && ring->funcs->emit_cntxcntl) {
> +		if (need_ctx_switch)
> +			status |= HAVE_CTX_SWITCH;
> +		status |= job->preamble_status;
> +		amdgpu_ring_emit_cntxcntl(ring, status);
> +	}
> +
>   	for (i = 0; i < num_ibs; ++i) {
>   		ib = &ibs[i];
> -
> -		/* drop preamble IBs if we don't have a context switch */
> -		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
> -			continue;
> -
>   		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
>   				    need_ctx_switch);
>   		need_ctx_switch = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index ce1e616..8f6d860 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6164,6 +6164,36 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
>   	amdgpu_ring_write(ring, 0);
>   }
>   
> +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> +{
> +	uint32_t dw2 = 0;
> +
> +	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> +	if (flags & HAVE_CTX_SWITCH) {
> +		/* set load_global_config & load_global_uconfig */
> +		dw2 |= 0x8001;
> +		/* set load_cs_sh_regs */
> +		dw2 |= 0x01000000;
> +		/* set load_per_context_state & load_gfx_sh_regs for GFX */
> +		if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
> +			dw2 |= 0x10002;
> +
> +		/* set load_ce_ram if preamble presented */
> +		if (PREAMBLE_IB_PRESENT & flags)
> +			dw2 |= 0x10000000;
> +	} else {
> +		/* still load_ce_ram if this is the first time preamble presented
> +		 * although there is no context switch happens.
> +		 */
> +		if (PREAMBLE_IB_PRESENT_FIRST & flags)
> +			dw2 |= 0x10000000;
> +	}
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> +	amdgpu_ring_write(ring, dw2);
> +	amdgpu_ring_write(ring, 0);
> +}
> +
>   static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   						 enum amdgpu_interrupt_state state)
>   {
> @@ -6472,6 +6502,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
>   	.insert_nop = amdgpu_ring_insert_nop,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_switch_buffer = gfx_v8_ring_emit_sb,
> +	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> @@ -6490,6 +6521,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
>   	.test_ib = gfx_v8_0_ring_test_ib,
>   	.insert_nop = amdgpu_ring_insert_nop,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]         ` <283cbf94-05bf-4d30-c03f-731dd34ac653-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-29  8:14           ` Liu, Monk
       [not found]             ` <MWHPR12MB1182364E8ADFBB9B2AAFD47A84E10-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Liu, Monk @ 2016-08-29  8:14 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

No, compute ring also can leverage constant engines, that's depend on OCL umd behavior
I just make sure KMD do nothing wrong 

BR Monk

-----Original Message-----
From: Christian König [mailto:deathsimple@vodafone.de] 
Sent: Monday, August 29, 2016 4:06 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de

Am 29.08.2016 um 04:55 schrieb Monk Liu:
> CE & DE can have most up to 128dw as the gap between them so to sync 
> CE nad DE we don't need double SWITCH_BUFFERs any more, which is urgly 
> and harm performance, we only need insert 128NOP after VM flush to 
> prevent CE vm fault.
>
> Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Looks good to me, but only the GFX engines have a CE. So syncing on the compute engines is pretty much pointless.

So I suggest that you move this into the "usepfp" if branch as well.

With that fixed the patch is Reviewed-by: Christian König <christian.koenig@amd.com>.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------
>   1 file changed, 6 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 26fced0..ce1e616 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6005,14 +6005,6 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>   	amdgpu_ring_write(ring, seq);
>   	amdgpu_ring_write(ring, 0xffffffff);
>   	amdgpu_ring_write(ring, 4); /* poll interval */
> -
> -	if (usepfp) {
> -		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -	}
>   }
>   
>   static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, @@ 
> -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   {
>   	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
>   
> +	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
> +	amdgpu_ring_insert_nop(ring, 128);
> +
>   	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>   	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>   				 WRITE_DATA_DST_SEL(0)) |
> @@ -6059,11 +6054,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>   		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>   		amdgpu_ring_write(ring, 0x0);
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
>   	}
> +
> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
> +	amdgpu_ring_insert_nop(ring, 128);
>   }
>   
>   static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL
       [not found]         ` <106871c7-6405-99c9-4f42-beb0be317b2d-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-29  8:19           ` Liu, Monk
  0 siblings, 0 replies; 17+ messages in thread
From: Liu, Monk @ 2016-08-29  8:19 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Oh, sorry for the trouble,

I resent the patch serials, the previous one have wrong comment and lack of patch3 , although patch 3 is already reviewed-by (rename job->ctx)

BR Monk

-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Christian K?nig
Sent: Monday, August 29, 2016 4:10 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL

Am 29.08.2016 um 04:55 schrieb Monk Liu:
> use CONTEXT_CONTROL package to dynamically skip preamble IB and other 
> load_xxx command in sequence.
>
> Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Again, please rebase on top of amd-staging-4.6. Apart from that I need to take a closer look later today.

BTW: Where is patch #3?

Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  9 +++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 16 +++++++++-------
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 32 ++++++++++++++++++++++++++++++++
>   4 files changed, 62 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9132719..a9dfeb5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -339,6 +339,7 @@ struct amdgpu_ring_funcs {
>   	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val);
>   	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
>   	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
> +	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
>   };
>   
>   /*
> @@ -1050,6 +1051,7 @@ struct amdgpu_ctx {
>   	spinlock_t		ring_lock;
>   	struct fence            **fences;
>   	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
> +	bool preamble_presented;
>   };
>   
>   struct amdgpu_ctx_mgr {
> @@ -1320,8 +1322,13 @@ struct amdgpu_cs_parser {
>   
>   	/* user fence */
>   	struct amdgpu_bo_list_entry	uf_entry;
> +	bool preamble_present; /* True means this command submit involves a 
> +preamble IB */
>   };
>   
> +#define PREAMBLE_IB_PRESENT 		(1 << 0) /* bit set means command submit involves a preamble IB */
> +#define PREAMBLE_IB_PRESENT_FIRST	(1 << 1) /* bit set means preamble IB is first presented in belonging context */
> +#define HAVE_CTX_SWITCH		(1 << 2) /* bit set means context switch occured */
> +
>   struct amdgpu_job {
>   	struct amd_sched_job    base;
>   	struct amdgpu_device	*adev;
> @@ -1330,6 +1337,7 @@ struct amdgpu_job {
>   	struct amdgpu_sync	sync;
>   	struct amdgpu_ib	*ibs;
>   	struct fence		*fence; /* the hw fence */
> +	uint32_t		preamble_status;
>   	uint32_t		num_ibs;
>   	void			*owner;
>   	uint64_t		fence_ctx; /* the fence_context this job uses */
> @@ -2374,6 +2382,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
>   #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
>   #define amdgpu_ring_emit_switch_buffer(r) 
> (r)->funcs->emit_switch_buffer((r))
> +#define amdgpu_ring_emit_cntxcntl(r, d) 
> +(r)->funcs->emit_cntxcntl((r), (d))
>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>   #define amdgpu_ring_patch_cond_exec(r,o) 
> (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 78d3831..f2d739a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -711,6 +711,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   		if (r)
>   			return r;
>   
> +		if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
> +			parser->preamble_present = true;
> +
>   		if (parser->job->ring && parser->job->ring != ring)
>   			return -EINVAL;
>   
> @@ -849,6 +852,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   		return r;
>   	}
>   
> +	if (p->preamble_present) {
> +		job->preamble_status |= PREAMBLE_IB_PRESENT;
> +		if (!p->ctx->preamble_presented)
> +			job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
> +	}
> +
>   	job->owner = p->filp;
>   	job->fence_ctx = entity->fence_context;
>   	p->fence = fence_get(&job->base.s_fence->finished);
> @@ -859,6 +868,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	trace_amdgpu_cs_ioctl(job);
>   	amd_sched_entity_push_job(&job->base);
>   
> +	if (p->preamble_present)
> +		p->ctx->preamble_presented = true;
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index df379c7..7c501ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   {
>   	struct amdgpu_device *adev = ring->adev;
>   	struct amdgpu_ib *ib = &ibs[0];
> -	bool skip_preamble, need_ctx_switch;
> +	bool need_ctx_switch;
>   	unsigned patch_offset = ~0;
>   	struct amdgpu_vm *vm;
>   	uint64_t fence_ctx;
> +	uint32_t status = 0;
>   
>   	unsigned i;
>   	int r = 0;
> @@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   	/* always set cond_exec_polling to CONTINUE */
>   	*ring->cond_exe_cpu_addr = 1;
>   
> -	skip_preamble = ring->current_ctx == fence_ctx;
>   	need_ctx_switch = ring->current_ctx != fence_ctx;
> +	if (job && ring->funcs->emit_cntxcntl) {
> +		if (need_ctx_switch)
> +			status |= HAVE_CTX_SWITCH;
> +		status |= job->preamble_status;
> +		amdgpu_ring_emit_cntxcntl(ring, status);
> +	}
> +
>   	for (i = 0; i < num_ibs; ++i) {
>   		ib = &ibs[i];
> -
> -		/* drop preamble IBs if we don't have a context switch */
> -		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
> -			continue;
> -
>   		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
>   				    need_ctx_switch);
>   		need_ctx_switch = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index ce1e616..8f6d860 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6164,6 +6164,36 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
>   	amdgpu_ring_write(ring, 0);
>   }
>   
> +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, 
> +uint32_t flags) {
> +	uint32_t dw2 = 0;
> +
> +	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> +	if (flags & HAVE_CTX_SWITCH) {
> +		/* set load_global_config & load_global_uconfig */
> +		dw2 |= 0x8001;
> +		/* set load_cs_sh_regs */
> +		dw2 |= 0x01000000;
> +		/* set load_per_context_state & load_gfx_sh_regs for GFX */
> +		if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
> +			dw2 |= 0x10002;
> +
> +		/* set load_ce_ram if preamble presented */
> +		if (PREAMBLE_IB_PRESENT & flags)
> +			dw2 |= 0x10000000;
> +	} else {
> +		/* still load_ce_ram if this is the first time preamble presented
> +		 * although there is no context switch happens.
> +		 */
> +		if (PREAMBLE_IB_PRESENT_FIRST & flags)
> +			dw2 |= 0x10000000;
> +	}
> +
> +	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> +	amdgpu_ring_write(ring, dw2);
> +	amdgpu_ring_write(ring, 0);
> +}
> +
>   static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   						 enum amdgpu_interrupt_state state)
>   {
> @@ -6472,6 +6502,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
>   	.insert_nop = amdgpu_ring_insert_nop,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_switch_buffer = gfx_v8_ring_emit_sb,
> +	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = 
> { @@ -6490,6 +6521,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
>   	.test_ib = gfx_v8_0_ring_test_ib,
>   	.insert_nop = amdgpu_ring_insert_nop,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]             ` <MWHPR12MB1182364E8ADFBB9B2AAFD47A84E10-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2016-08-29  8:25               ` Christian König
       [not found]                 ` <1c8cab37-70cb-3e17-b651-b3b6b2b5b8e0-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Christian König @ 2016-08-29  8:25 UTC (permalink / raw)
  To: Liu, Monk, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hui what?

It feels like a hundred times I asked if the compute engine has a CE as 
well, but so far the answer was always No. That would explain a whole 
bunch of problems we had with the compute rings as well.

In this case the patch is good as it is, please just rebase it.

Christian.

Am 29.08.2016 um 10:14 schrieb Liu, Monk:
> No, compute ring also can leverage constant engines, that's depend on OCL umd behavior
> I just make sure KMD do nothing wrong
>
> BR Monk
>
> -----Original Message-----
> From: Christian König [mailto:deathsimple@vodafone.de]
> Sent: Monday, August 29, 2016 4:06 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
>
> Am 29.08.2016 um 04:55 schrieb Monk Liu:
>> CE & DE can have most up to 128dw as the gap between them so to sync
>> CE nad DE we don't need double SWITCH_BUFFERs any more, which is urgly
>> and harm performance, we only need insert 128NOP after VM flush to
>> prevent CE vm fault.
>>
>> Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Looks good to me, but only the GFX engines have a CE. So syncing on the compute engines is pretty much pointless.
>
> So I suggest that you move this into the "usepfp" if branch as well.
>
> With that fixed the patch is Reviewed-by: Christian König <christian.koenig@amd.com>.
>
> Regards,
> Christian.
>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------
>>    1 file changed, 6 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 26fced0..ce1e616 100755
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -6005,14 +6005,6 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>>    	amdgpu_ring_write(ring, seq);
>>    	amdgpu_ring_write(ring, 0xffffffff);
>>    	amdgpu_ring_write(ring, 4); /* poll interval */
>> -
>> -	if (usepfp) {
>> -		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>> -		amdgpu_ring_write(ring, 0);
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>> -		amdgpu_ring_write(ring, 0);
>> -	}
>>    }
>>    
>>    static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, @@
>> -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>    {
>>    	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
>>    
>> +	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
>> +	amdgpu_ring_insert_nop(ring, 128);
>> +
>>    	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>    	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>>    				 WRITE_DATA_DST_SEL(0)) |
>> @@ -6059,11 +6054,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>    		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>>    		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>>    		amdgpu_ring_write(ring, 0x0);
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>> -		amdgpu_ring_write(ring, 0);
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>> -		amdgpu_ring_write(ring, 0);
>>    	}
>> +
>> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
>> +	amdgpu_ring_insert_nop(ring, 128);
>>    }
>>    
>>    static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]                 ` <1c8cab37-70cb-3e17-b651-b3b6b2b5b8e0-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-29  8:28                   ` Liu, Monk
       [not found]                     ` <MWHPR12MB118285051FDBA2381204FE0384E10-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2016-08-29 13:37                   ` Deucher, Alexander
  1 sibling, 1 reply; 17+ messages in thread
From: Liu, Monk @ 2016-08-29  8:28 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

But speaking with practice attitude: at least close source UMD OCL doesn't use CE at all, and I guess MESA either ...

BR Monk

-----Original Message-----
From: Christian König [mailto:deathsimple@vodafone.de] 
Sent: Monday, August 29, 2016 4:25 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de

Hui what?

It feels like a hundred times I asked if the compute engine has a CE as well, but so far the answer was always No. That would explain a whole bunch of problems we had with the compute rings as well.

In this case the patch is good as it is, please just rebase it.

Christian.

Am 29.08.2016 um 10:14 schrieb Liu, Monk:
> No, compute ring also can leverage constant engines, that's depend on 
> OCL umd behavior I just make sure KMD do nothing wrong
>
> BR Monk
>
> -----Original Message-----
> From: Christian König [mailto:deathsimple@vodafone.de]
> Sent: Monday, August 29, 2016 4:06 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
>
> Am 29.08.2016 um 04:55 schrieb Monk Liu:
>> CE & DE can have most up to 128dw as the gap between them so to sync 
>> CE nad DE we don't need double SWITCH_BUFFERs any more, which is 
>> urgly and harm performance, we only need insert 128NOP after VM flush 
>> to prevent CE vm fault.
>>
>> Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Looks good to me, but only the GFX engines have a CE. So syncing on the compute engines is pretty much pointless.
>
> So I suggest that you move this into the "usepfp" if branch as well.
>
> With that fixed the patch is Reviewed-by: Christian König <christian.koenig@amd.com>.
>
> Regards,
> Christian.
>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------
>>    1 file changed, 6 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 26fced0..ce1e616 100755
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -6005,14 +6005,6 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>>    	amdgpu_ring_write(ring, seq);
>>    	amdgpu_ring_write(ring, 0xffffffff);
>>    	amdgpu_ring_write(ring, 4); /* poll interval */
>> -
>> -	if (usepfp) {
>> -		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>> -		amdgpu_ring_write(ring, 0);
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>> -		amdgpu_ring_write(ring, 0);
>> -	}
>>    }
>>    
>>    static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 
>> @@
>> -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>    {
>>    	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
>>    
>> +	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
>> +	amdgpu_ring_insert_nop(ring, 128);
>> +
>>    	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>    	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>>    				 WRITE_DATA_DST_SEL(0)) |
>> @@ -6059,11 +6054,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>    		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>>    		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>>    		amdgpu_ring_write(ring, 0x0);
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>> -		amdgpu_ring_write(ring, 0);
>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>> -		amdgpu_ring_write(ring, 0);
>>    	}
>> +
>> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
>> +	amdgpu_ring_insert_nop(ring, 128);
>>    }
>>    
>>    static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring 
>> *ring)
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]                     ` <MWHPR12MB118285051FDBA2381204FE0384E10-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2016-08-29  8:54                       ` Christian König
  0 siblings, 0 replies; 17+ messages in thread
From: Christian König @ 2016-08-29  8:54 UTC (permalink / raw)
  To: Liu, Monk, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Well a perfect example of why we shouldn't take a look at what userspace 
does, but rather what the hardware can do when we design the IOCTLs.

I'm trying to raise awareness for this for quite a while now, but a lot 
of people seem to think when the UMD doesn't do something the kernel 
doesn't need to handle this situation.

Thanks for that info,
Christian.

Am 29.08.2016 um 10:28 schrieb Liu, Monk:
> But speaking with practice attitude: at least close source UMD OCL doesn't use CE at all, and I guess MESA either ...
>
> BR Monk
>
> -----Original Message-----
> From: Christian König [mailto:deathsimple@vodafone.de]
> Sent: Monday, August 29, 2016 4:25 PM
> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
>
> Hui what?
>
> It feels like a hundred times I asked if the compute engine has a CE as well, but so far the answer was always No. That would explain a whole bunch of problems we had with the compute rings as well.
>
> In this case the patch is good as it is, please just rebase it.
>
> Christian.
>
> Am 29.08.2016 um 10:14 schrieb Liu, Monk:
>> No, compute ring also can leverage constant engines, that's depend on
>> OCL umd behavior I just make sure KMD do nothing wrong
>>
>> BR Monk
>>
>> -----Original Message-----
>> From: Christian König [mailto:deathsimple@vodafone.de]
>> Sent: Monday, August 29, 2016 4:06 PM
>> To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
>>
>> Am 29.08.2016 um 04:55 schrieb Monk Liu:
>>> CE & DE can have most up to 128dw as the gap between them so to sync
>>> CE nad DE we don't need double SWITCH_BUFFERs any more, which is
>>> urgly and harm performance, we only need insert 128NOP after VM flush
>>> to prevent CE vm fault.
>>>
>>> Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411
>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> Looks good to me, but only the GFX engines have a CE. So syncing on the compute engines is pretty much pointless.
>>
>> So I suggest that you move this into the "usepfp" if branch as well.
>>
>> With that fixed the patch is Reviewed-by: Christian König <christian.koenig@amd.com>.
>>
>> Regards,
>> Christian.
>>
>>> ---
>>>     drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------
>>>     1 file changed, 6 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> index 26fced0..ce1e616 100755
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>>> @@ -6005,14 +6005,6 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>>>     	amdgpu_ring_write(ring, seq);
>>>     	amdgpu_ring_write(ring, 0xffffffff);
>>>     	amdgpu_ring_write(ring, 4); /* poll interval */
>>> -
>>> -	if (usepfp) {
>>> -		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
>>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>>> -		amdgpu_ring_write(ring, 0);
>>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>>> -		amdgpu_ring_write(ring, 0);
>>> -	}
>>>     }
>>>     
>>>     static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>> @@
>>> -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>>     {
>>>     	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
>>>     
>>> +	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
>>> +	amdgpu_ring_insert_nop(ring, 128);
>>> +
>>>     	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>>>     	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>>>     				 WRITE_DATA_DST_SEL(0)) |
>>> @@ -6059,11 +6054,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>>     		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>>>     		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>>>     		amdgpu_ring_write(ring, 0x0);
>>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>>> -		amdgpu_ring_write(ring, 0);
>>> -		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
>>> -		amdgpu_ring_write(ring, 0);
>>>     	}
>>> +
>>> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
>>> +	amdgpu_ring_insert_nop(ring, 128);
>>>     }
>>>     
>>>     static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring
>>> *ring)
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]     ` <1472439337-8002-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2016-08-29  8:05       ` Christian König
@ 2016-08-29 13:21       ` Deucher, Alexander
  1 sibling, 0 replies; 17+ messages in thread
From: Deucher, Alexander @ 2016-08-29 13:21 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Sunday, August 28, 2016 10:56 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
> 
> CE & DE can have most up to 128dw as the gap between them
> so to sync CE nad DE we don't need double SWITCH_BUFFERs any
> more, which is urgly and harm performance, we only need
> insert 128NOP after VM flush to prevent CE vm fault.
> 
> Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------
>  1 file changed, 6 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 26fced0..ce1e616 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6005,14 +6005,6 @@ static void
> gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>  	amdgpu_ring_write(ring, seq);
>  	amdgpu_ring_write(ring, 0xffffffff);
>  	amdgpu_ring_write(ring, 4); /* poll interval */
> -
> -	if (usepfp) {
> -		/* synce CE with ME to prevent CE fetch CEIB before context
> switch done */
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -	}
>  }
> 
>  static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
> @@ -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>  {
>  	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
> 
> +	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE
> finish CEIB */
> +	amdgpu_ring_insert_nop(ring, 128);

Here and below we can make the nop insertion conditional on usepfp since there is no CE on the compute MEC.

Alex

> +
>  	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>  	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>  				 WRITE_DATA_DST_SEL(0)) |
> @@ -6059,11 +6054,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>  		/* sync PFP to ME, otherwise we might get invalid PFP reads
> */
>  		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
>  		amdgpu_ring_write(ring, 0x0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -		amdgpu_ring_write(ring, 0);
>  	}
> +
> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush
> finish */
> +	amdgpu_ring_insert_nop(ring, 128);
>  }
> 
>  static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> --
> 1.9.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]                 ` <1c8cab37-70cb-3e17-b651-b3b6b2b5b8e0-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2016-08-29  8:28                   ` Liu, Monk
@ 2016-08-29 13:37                   ` Deucher, Alexander
       [not found]                     ` <MWHPR12MB16948C33AC82C6566DBA5C5EF7E10-Gy0DoCVfaSW4WA4dJ5YXGAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  1 sibling, 1 reply; 17+ messages in thread
From: Deucher, Alexander @ 2016-08-29 13:37 UTC (permalink / raw)
  To: 'Christian König',
	Liu, Monk, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Christian König
> Sent: Monday, August 29, 2016 4:25 AM
> To: Liu, Monk; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
> 
> Hui what?
> 
> It feels like a hundred times I asked if the compute engine has a CE as
> well, but so far the answer was always No. That would explain a whole
> bunch of problems we had with the compute rings as well.

Can we get some clarification?  I doesn't have a dedicated CE as far as I know.  Can the CE be used in conjunction with MEC as well as the PFP/ME?

Alex

> 
> In this case the patch is good as it is, please just rebase it.
> 
> Christian.
> 
> Am 29.08.2016 um 10:14 schrieb Liu, Monk:
> > No, compute ring also can leverage constant engines, that's depend on OCL
> umd behavior
> > I just make sure KMD do nothing wrong
> >
> > BR Monk
> >
> > -----Original Message-----
> > From: Christian König [mailto:deathsimple@vodafone.de]
> > Sent: Monday, August 29, 2016 4:06 PM
> > To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> > Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
> >
> > Am 29.08.2016 um 04:55 schrieb Monk Liu:
> >> CE & DE can have most up to 128dw as the gap between them so to sync
> >> CE nad DE we don't need double SWITCH_BUFFERs any more, which is
> urgly
> >> and harm performance, we only need insert 128NOP after VM flush to
> >> prevent CE vm fault.
> >>
> >> Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411
> >> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> > Looks good to me, but only the GFX engines have a CE. So syncing on the
> compute engines is pretty much pointless.
> >
> > So I suggest that you move this into the "usepfp" if branch as well.
> >
> > With that fixed the patch is Reviewed-by: Christian König
> <christian.koenig@amd.com>.
> >
> > Regards,
> > Christian.
> >
> >> ---
> >>    drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------
> >>    1 file changed, 6 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> >> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> >> index 26fced0..ce1e616 100755
> >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> >> @@ -6005,14 +6005,6 @@ static void
> gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
> >>    	amdgpu_ring_write(ring, seq);
> >>    	amdgpu_ring_write(ring, 0xffffffff);
> >>    	amdgpu_ring_write(ring, 4); /* poll interval */
> >> -
> >> -	if (usepfp) {
> >> -		/* synce CE with ME to prevent CE fetch CEIB before context
> switch done */
> >> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> >> -		amdgpu_ring_write(ring, 0);
> >> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> >> -		amdgpu_ring_write(ring, 0);
> >> -	}
> >>    }
> >>
> >>    static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
> @@
> >> -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
> >>    {
> >>    	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
> >>
> >> +	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE
> finish CEIB */
> >> +	amdgpu_ring_insert_nop(ring, 128);
> >> +
> >>    	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> >>    	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
> >>    				 WRITE_DATA_DST_SEL(0)) |
> >> @@ -6059,11 +6054,10 @@ static void
> gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
> >>    		/* sync PFP to ME, otherwise we might get invalid PFP reads
> */
> >>    		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
> >>    		amdgpu_ring_write(ring, 0x0);
> >> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> >> -		amdgpu_ring_write(ring, 0);
> >> -		amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> >> -		amdgpu_ring_write(ring, 0);
> >>    	}
> >> +
> >> +	/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush
> finish */
> >> +	amdgpu_ring_insert_nop(ring, 128);
> >>    }
> >>
> >>    static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]                     ` <MWHPR12MB16948C33AC82C6566DBA5C5EF7E10-Gy0DoCVfaSW4WA4dJ5YXGAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2016-09-05 19:13                       ` Marek Olšák
       [not found]                         ` <CAAxE2A6s0ephQBxb=b8XU01wNbQ3aCX51thv4DrLCs6VckLxzw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Marek Olšák @ 2016-09-05 19:13 UTC (permalink / raw)
  To: Deucher, Alexander
  Cc: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Liu, Monk

On Mon, Aug 29, 2016 at 3:37 PM, Deucher, Alexander
<Alexander.Deucher@amd.com> wrote:
>> -----Original Message-----
>> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
>> Of Christian König
>> Sent: Monday, August 29, 2016 4:25 AM
>> To: Liu, Monk; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
>>
>> Hui what?
>>
>> It feels like a hundred times I asked if the compute engine has a CE as
>> well, but so far the answer was always No. That would explain a whole
>> bunch of problems we had with the compute rings as well.
>
> Can we get some clarification?  I doesn't have a dedicated CE as far as I know.  Can the CE be used in conjunction with MEC as well as the PFP/ME?

From the CP program spec, CE can only be used with DE and DE = CP.ME.
Also, CE is described in the Kernel Graphics Queue section. There is
no mention of CE in the compute sections.

Thus, compute IBs can't be paired with CE IBs.

Marek
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
       [not found]                         ` <CAAxE2A6s0ephQBxb=b8XU01wNbQ3aCX51thv4DrLCs6VckLxzw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2016-09-06  2:58                           ` Liu, Monk
  0 siblings, 0 replies; 17+ messages in thread
From: Liu, Monk @ 2016-09-06  2:58 UTC (permalink / raw)
  To: Marek Olšák, Deucher, Alexander
  Cc: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Yeah, CPC cannot use constant engine.

BR Monk

-----Original Message-----
From: Marek Olšák [mailto:maraeo@gmail.com] 
Sent: Tuesday, September 06, 2016 3:13 AM
To: Deucher, Alexander <Alexander.Deucher@amd.com>
Cc: Christian König <deathsimple@vodafone.de>; Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de

On Mon, Aug 29, 2016 at 3:37 PM, Deucher, Alexander <Alexander.Deucher@amd.com> wrote:
>> -----Original Message-----
>> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On 
>> Behalf Of Christian König
>> Sent: Monday, August 29, 2016 4:25 AM
>> To: Liu, Monk; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 2/4] drm/amdgpu:new method to sync ce&de
>>
>> Hui what?
>>
>> It feels like a hundred times I asked if the compute engine has a CE 
>> as well, but so far the answer was always No. That would explain a 
>> whole bunch of problems we had with the compute rings as well.
>
> Can we get some clarification?  I doesn't have a dedicated CE as far as I know.  Can the CE be used in conjunction with MEC as well as the PFP/ME?

From the CP program spec, CE can only be used with DE and DE = CP.ME.
Also, CE is described in the Kernel Graphics Queue section. There is no mention of CE in the compute sections.

Thus, compute IBs can't be paired with CE IBs.

Marek
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL
       [not found] ` <1472460939-31571-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-29  8:55   ` Monk Liu
  0 siblings, 0 replies; 17+ messages in thread
From: Monk Liu @ 2016-08-29  8:55 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

use CONTEXT_CONTROL package to dynamically skip
preamble IB and other load_xxx command in sequence.

Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  9 +++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 16 +++++++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1254410..0de5f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -321,6 +321,7 @@ struct amdgpu_ring_funcs {
 	void (*begin_use)(struct amdgpu_ring *ring);
 	void (*end_use)(struct amdgpu_ring *ring);
 	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 };
 
 /*
@@ -965,6 +966,7 @@ struct amdgpu_ctx {
 	spinlock_t		ring_lock;
 	struct fence            **fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
+	bool preamble_presented;
 };
 
 struct amdgpu_ctx_mgr {
@@ -1227,8 +1229,13 @@ struct amdgpu_cs_parser {
 
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
+	bool preamble_present; /* True means this command submit involves a preamble IB */
 };
 
+#define PREAMBLE_IB_PRESENT 		(1 << 0) /* bit set means command submit involves a preamble IB */
+#define PREAMBLE_IB_PRESENT_FIRST	(1 << 1) /* bit set means preamble IB is first presented in belonging context */
+#define HAVE_CTX_SWITCH		(1 << 2) /* bit set means context switch occured */
+
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amdgpu_device	*adev;
@@ -1237,6 +1244,7 @@ struct amdgpu_job {
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
 	struct fence		*fence; /* the hw fence */
+	uint32_t		preamble_status;
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
@@ -2264,6 +2272,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
 #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2d4e005..6d8c050 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -792,6 +792,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (r)
 			return r;
 
+		if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+			parser->preamble_present = true;
+
 		if (parser->job->ring && parser->job->ring != ring)
 			return -EINVAL;
 
@@ -930,6 +933,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		return r;
 	}
 
+	if (p->preamble_present) {
+		job->preamble_status |= PREAMBLE_IB_PRESENT;
+		if (!p->ctx->preamble_presented)
+			job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
+	}
+
 	job->owner = p->filp;
 	job->fence_ctx = entity->fence_context;
 	p->fence = fence_get(&job->base.s_fence->finished);
@@ -940,6 +949,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 
+	if (p->preamble_present)
+		p->ctx->preamble_presented = true;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 04263f0..b12b5ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
-	bool skip_preamble, need_ctx_switch;
+	bool need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
 	uint64_t fence_ctx;
+	uint32_t status = 0;
 
 	unsigned i;
 	int r = 0;
@@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	/* always set cond_exec_polling to CONTINUE */
 	*ring->cond_exe_cpu_addr = 1;
 
-	skip_preamble = ring->current_ctx == fence_ctx;
 	need_ctx_switch = ring->current_ctx != fence_ctx;
+	if (job && ring->funcs->emit_cntxcntl) {
+		if (need_ctx_switch)
+			status |= HAVE_CTX_SWITCH;
+		status |= job->preamble_status;
+		amdgpu_ring_emit_cntxcntl(ring, status);
+	}
+
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
-
-		/* drop preamble IBs if we don't have a context switch */
-		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
-			continue;
-
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fbc57a7..5cf53d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6085,6 +6085,36 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0);
 }
 
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+	uint32_t dw2 = 0;
+
+	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+	if (flags & HAVE_CTX_SWITCH) {
+		/* set load_global_config & load_global_uconfig */
+		dw2 |= 0x8001;
+		/* set load_cs_sh_regs */
+		dw2 |= 0x01000000;
+		/* set load_per_context_state & load_gfx_sh_regs for GFX */
+		if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
+			dw2 |= 0x10002;
+
+		/* set load_ce_ram if preamble presented */
+		if (PREAMBLE_IB_PRESENT & flags)
+			dw2 |= 0x10000000;
+	} else {
+		/* still load_ce_ram if this is the first time preamble presented
+		 * although there is no context switch happens.
+		 */
+		if (PREAMBLE_IB_PRESENT_FIRST & flags)
+			dw2 |= 0x10000000;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, dw2);
+	amdgpu_ring_write(ring, 0);
+}
+
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -6267,6 +6297,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
+	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -6285,6 +6316,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 };
 
 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL
       [not found] ` <1472440438-9429-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-29  3:13   ` Monk Liu
  0 siblings, 0 replies; 17+ messages in thread
From: Monk Liu @ 2016-08-29  3:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

use CONTEXT_CONTROL package to dynamically skip
preamble IB and other load_xxx command in sequence.

Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  9 +++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 16 +++++++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9132719..a9dfeb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -339,6 +339,7 @@ struct amdgpu_ring_funcs {
 	void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val);
 	void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
 	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 };
 
 /*
@@ -1050,6 +1051,7 @@ struct amdgpu_ctx {
 	spinlock_t		ring_lock;
 	struct fence            **fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
+	bool preamble_presented;
 };
 
 struct amdgpu_ctx_mgr {
@@ -1320,8 +1322,13 @@ struct amdgpu_cs_parser {
 
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
+	bool preamble_present; /* True means this command submit involves a preamble IB */
 };
 
+#define PREAMBLE_IB_PRESENT 		(1 << 0) /* bit set means command submit involves a preamble IB */
+#define PREAMBLE_IB_PRESENT_FIRST	(1 << 1) /* bit set means preamble IB is first presented in belonging context */
+#define HAVE_CTX_SWITCH		(1 << 2) /* bit set means context switch occured */
+
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amdgpu_device	*adev;
@@ -1330,6 +1337,7 @@ struct amdgpu_job {
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
 	struct fence		*fence; /* the hw fence */
+	uint32_t		preamble_status;
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		fence_ctx; /* the fence_context this job uses */
@@ -2374,6 +2382,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v))
 #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 78d3831..f2d739a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -711,6 +711,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (r)
 			return r;
 
+		if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+			parser->preamble_present = true;
+
 		if (parser->job->ring && parser->job->ring != ring)
 			return -EINVAL;
 
@@ -849,6 +852,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		return r;
 	}
 
+	if (p->preamble_present) {
+		job->preamble_status |= PREAMBLE_IB_PRESENT;
+		if (!p->ctx->preamble_presented)
+			job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
+	}
+
 	job->owner = p->filp;
 	job->fence_ctx = entity->fence_context;
 	p->fence = fence_get(&job->base.s_fence->finished);
@@ -859,6 +868,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 
+	if (p->preamble_present)
+		p->ctx->preamble_presented = true;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index df379c7..7c501ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
-	bool skip_preamble, need_ctx_switch;
+	bool need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
 	uint64_t fence_ctx;
+	uint32_t status = 0;
 
 	unsigned i;
 	int r = 0;
@@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	/* always set cond_exec_polling to CONTINUE */
 	*ring->cond_exe_cpu_addr = 1;
 
-	skip_preamble = ring->current_ctx == fence_ctx;
 	need_ctx_switch = ring->current_ctx != fence_ctx;
+	if (job && ring->funcs->emit_cntxcntl) {
+		if (need_ctx_switch)
+			status |= HAVE_CTX_SWITCH;
+		status |= job->preamble_status;
+		amdgpu_ring_emit_cntxcntl(ring, status);
+	}
+
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
-
-		/* drop preamble IBs if we don't have a context switch */
-		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
-			continue;
-
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ce1e616..8f6d860 100755
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6164,6 +6164,36 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0);
 }
 
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+	uint32_t dw2 = 0;
+
+	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+	if (flags & HAVE_CTX_SWITCH) {
+		/* set load_global_config & load_global_uconfig */
+		dw2 |= 0x8001;
+		/* set load_cs_sh_regs */
+		dw2 |= 0x01000000;
+		/* set load_per_context_state & load_gfx_sh_regs for GFX */
+		if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
+			dw2 |= 0x10002;
+
+		/* set load_ce_ram if preamble presented */
+		if (PREAMBLE_IB_PRESENT & flags)
+			dw2 |= 0x10000000;
+	} else {
+		/* still load_ce_ram if this is the first time preamble presented
+		 * although there is no context switch happens.
+		 */
+		if (PREAMBLE_IB_PRESENT_FIRST & flags)
+			dw2 |= 0x10000000;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, dw2);
+	amdgpu_ring_write(ring, 0);
+}
+
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -6472,6 +6502,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
+	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -6490,6 +6521,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2016-09-06  2:58 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-29  2:55 [PATCH 1/4] drm/amdgpu:add switch buffer to end of CS Monk Liu
     [not found] ` <1472439337-8002-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-08-29  2:55   ` [PATCH 2/4] drm/amdgpu:new method to sync ce&de Monk Liu
     [not found]     ` <1472439337-8002-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-08-29  8:05       ` Christian König
     [not found]         ` <283cbf94-05bf-4d30-c03f-731dd34ac653-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-29  8:14           ` Liu, Monk
     [not found]             ` <MWHPR12MB1182364E8ADFBB9B2AAFD47A84E10-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2016-08-29  8:25               ` Christian König
     [not found]                 ` <1c8cab37-70cb-3e17-b651-b3b6b2b5b8e0-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-29  8:28                   ` Liu, Monk
     [not found]                     ` <MWHPR12MB118285051FDBA2381204FE0384E10-Gy0DoCVfaSVhjnLHdLm0OQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2016-08-29  8:54                       ` Christian König
2016-08-29 13:37                   ` Deucher, Alexander
     [not found]                     ` <MWHPR12MB16948C33AC82C6566DBA5C5EF7E10-Gy0DoCVfaSW4WA4dJ5YXGAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2016-09-05 19:13                       ` Marek Olšák
     [not found]                         ` <CAAxE2A6s0ephQBxb=b8XU01wNbQ3aCX51thv4DrLCs6VckLxzw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-09-06  2:58                           ` Liu, Monk
2016-08-29 13:21       ` Deucher, Alexander
2016-08-29  2:55   ` [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL Monk Liu
     [not found]     ` <1472439337-8002-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-08-29  8:10       ` Christian König
     [not found]         ` <106871c7-6405-99c9-4f42-beb0be317b2d-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-29  8:19           ` Liu, Monk
2016-08-29  8:04   ` [PATCH 1/4] drm/amdgpu:add switch buffer to end of CS Christian König
2016-08-29  3:13 [PATCH 0/4] fix GFX8 dma frame scheme Monk Liu
     [not found] ` <1472440438-9429-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-08-29  3:13   ` [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL Monk Liu
2016-08-29  8:55 [PATCH 0/4] fix GFX8 dmaframe scheme (v2) Monk Liu
     [not found] ` <1472460939-31571-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-08-29  8:55   ` [PATCH 4/4] drm/amdgpu:implement CONTEXT_CONTROL Monk Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.