All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/amdgpu: add VMHUB to ring association
@ 2017-04-05 16:21 Christian König
       [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2017-04-05 16:21 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Add the info which ring belonging to which VMHUB.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 1 +
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c    | 2 ++
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c    | 1 +
 5 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7479e47..45bb87b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -99,6 +99,7 @@ struct amdgpu_ring_funcs {
 	uint32_t		align_mask;
 	u32			nop;
 	bool			support_64bit_ptrs;
+	unsigned		vmhub;
 
 	/* ring read/write ptr handling */
 	u64 (*get_rptr)(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a967879..1cc006a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3673,6 +3673,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
@@ -3717,6 +3718,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
@@ -3746,6 +3748,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_GFXHUB,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d40eb31..8cbb49d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1473,6 +1473,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 	.align_mask = 0xf,
 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
 	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = sdma_v4_0_ring_get_rptr,
 	.get_wptr = sdma_v4_0_ring_get_wptr,
 	.set_wptr = sdma_v4_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 819148a..fa80465 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1448,6 +1448,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.align_mask = 0xf,
 	.nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0),
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_ring_get_rptr,
 	.get_wptr = uvd_v7_0_ring_get_wptr,
 	.set_wptr = uvd_v7_0_ring_set_wptr,
@@ -1475,6 +1476,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = HEVC_ENC_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_enc_ring_get_rptr,
 	.get_wptr = uvd_v7_0_enc_ring_get_wptr,
 	.set_wptr = uvd_v7_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 8dde83f..6374133 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1074,6 +1074,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = VCE_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = vce_v4_0_ring_get_rptr,
 	.get_wptr = vce_v4_0_ring_get_wptr,
 	.set_wptr = vce_v4_0_ring_set_wptr,
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub
       [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-04-05 16:21   ` Christian König
       [not found]     ` <1491409320-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-04-05 16:21   ` [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB Christian König
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2017-04-05 16:21 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Rather inefficient, but this way we only need to flush the current hub.

I wonder if we shouldn't make nails with heads and separate the VMID ranges completely.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 36 ++++++++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  6 +++---
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8785420..6fd1952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -406,6 +406,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->funcs->vmhub;
 	uint64_t fence_context = adev->fence_context + ring->idx;
 	struct fence *updates = sync->last_vm_update;
 	struct amdgpu_vm_id *id, *idle;
@@ -480,17 +481,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (atomic64_read(&id->owner) != vm->client_id)
 			continue;
 
-		if (job->vm_pd_addr != id->pd_gpu_addr)
+		if (job->vm_pd_addr != id->pd_gpu_addr[vmhub])
 			continue;
 
-		if (!id->last_flush)
+		if (!id->last_flush[vmhub])
 			continue;
 
-		if (id->last_flush->context != fence_context &&
-		    !fence_is_signaled(id->last_flush))
+		if (id->last_flush[vmhub]->context != fence_context &&
+		    !fence_is_signaled(id->last_flush[vmhub]))
 			continue;
 
-		flushed  = id->flushed_updates;
+		flushed  = id->flushed_updates[vmhub];
 		if (updates &&
 		    (!flushed || fence_is_later(updates, flushed)))
 			continue;
@@ -522,13 +523,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (r)
 		goto error;
 
-	fence_put(id->last_flush);
-	id->last_flush = NULL;
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		fence_put(id->last_flush[i]);
+		id->last_flush[i] = NULL;
+	}
 
-	fence_put(id->flushed_updates);
-	id->flushed_updates = fence_get(updates);
+	fence_put(id->flushed_updates[vmhub]);
+	id->flushed_updates[vmhub] = fence_get(updates);
 
-	id->pd_gpu_addr = job->vm_pd_addr;
+	id->pd_gpu_addr[vmhub] = job->vm_pd_addr;
 	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 	atomic64_set(&id->owner, vm->client_id);
@@ -591,6 +594,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
+	unsigned vmhub = ring->funcs->vmhub;
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
 		id->gds_base != job->gds_base ||
 		id->gds_size != job->gds_size ||
@@ -629,8 +633,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 			return r;
 
 		mutex_lock(&adev->vm_manager.lock);
-		fence_put(id->last_flush);
-		id->last_flush = fence;
+		fence_put(id->last_flush[vmhub]);
+		id->last_flush[vmhub] = fence;
 		mutex_unlock(&adev->vm_manager.lock);
 	}
 
@@ -2234,13 +2238,15 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
  */
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
-	unsigned i;
+	unsigned i, j;
 
 	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
 		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
 
 		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
-		fence_put(id->flushed_updates);
-		fence_put(id->last_flush);
+		for (j = 0; j < AMDGPU_MAX_VMHUBS; ++j) {
+			fence_put(id->flushed_updates[j]);
+			fence_put(id->last_flush[j]);
+		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 7d01372..d61dd83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -132,12 +132,12 @@ struct amdgpu_vm {
 struct amdgpu_vm_id {
 	struct list_head	list;
 	struct amdgpu_sync	active;
-	struct fence		*last_flush;
+	struct fence		*last_flush[AMDGPU_MAX_VMHUBS];
 	atomic64_t		owner;
 
-	uint64_t		pd_gpu_addr;
+	uint64_t		pd_gpu_addr[AMDGPU_MAX_VMHUBS];
 	/* last flushed PD/PT update */
-	struct fence		*flushed_updates;
+	struct fence		*flushed_updates[AMDGPU_MAX_VMHUBS];
 
 	uint32_t                current_gpu_reset_count;
 
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB
       [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-04-05 16:21   ` [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub Christian König
@ 2017-04-05 16:21   ` Christian König
       [not found]     ` <1491409320-2448-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-04-05 16:21   ` [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually Christian König
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2017-04-05 16:21 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Drop invalidating both hubs from each engine.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  36 +++++------
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  60 +++++++++---------
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 111 +++++++++++++++------------------
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  57 ++++++++---------
 4 files changed, 118 insertions(+), 146 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1cc006a..dce2950 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3147,35 +3147,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB];
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->ctx0_ptb_addr_lo32
-					   + (2 * vm_id),
-					   lower_32_bits(pd_addr));
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
+				   lower_32_bits(pd_addr));
 
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->ctx0_ptb_addr_hi32
-					   + (2 * vm_id),
-					   upper_32_bits(pd_addr));
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->ctx0_ptb_addr_hi32 + (2 * vm_id),
+				   upper_32_bits(pd_addr));
 
-		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
-					   hub->vm_inv_eng0_req + eng, req);
+	gfx_v9_0_write_data_to_reg(ring, usepfp, true,
+				   hub->vm_inv_eng0_req + eng, req);
 
-		/* wait for the invalidate to complete */
-		gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
-				      eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
-	}
+	/* wait for the invalidate to complete */
+	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
+			      eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
 
 	/* compute doesn't have PFP */
 	if (usepfp) {
@@ -3680,7 +3674,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
 		5 +  /* COND_EXEC */
 		7 +  /* PIPELINE_SYNC */
-		46 + /* VM_FLUSH */
+		24 + /* VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
 		4 + /* double SWITCH_BUFFER,
@@ -3727,7 +3721,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		64 + /* gfx_v9_0_ring_emit_vm_flush */
+		24 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -3757,7 +3751,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
 		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		64 + /* gfx_v9_0_ring_emit_vm_flush */
+		24 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 8cbb49d..06826a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1039,44 +1039,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					 unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
-				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
-		amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-				  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
-				  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, 1 << vm_id); /* reference */
-		amdgpu_ring_write(ring, 1 << vm_id); /* mask */
-		amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-				  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
-	}
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 1 << vm_id); /* reference */
+	amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
 static int sdma_v4_0_early_init(void *handle)
@@ -1481,7 +1477,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 		6 + /* sdma_v4_0_ring_emit_hdp_flush */
 		3 + /* sdma_v4_0_ring_emit_hdp_invalidate */
 		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
-		36 + /* sdma_v4_0_ring_emit_vm_flush */
+		18 + /* sdma_v4_0_ring_emit_vm_flush */
 		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
 	.emit_ib = sdma_v4_0_ring_emit_ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index fa80465..772c0f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1034,42 +1034,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
 static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	uint32_t data0, data1, mask;
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
-		data1 = upper_32_bits(pd_addr);
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
-		data1 = lower_32_bits(pd_addr);
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
-		data1 = lower_32_bits(pd_addr);
-		mask = 0xffffffff;
-		uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
-
-		/* flush TLB */
-		data0 = (hub->vm_inv_eng0_req + eng) << 2;
-		data1 = req;
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		/* wait for flush */
-		data0 = (hub->vm_inv_eng0_ack + eng) << 2;
-		data1 = 1 << vm_id;
-		mask =  1 << vm_id;
-		uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
-	}
+	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
+	data1 = upper_32_bits(pd_addr);
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data1 = lower_32_bits(pd_addr);
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data1 = lower_32_bits(pd_addr);
+	mask = 0xffffffff;
+	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
+
+	/* flush TLB */
+	data0 = (hub->vm_inv_eng0_req + eng) << 2;
+	data1 = req;
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
+
+	/* wait for flush */
+	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
+	data1 = 1 << vm_id;
+	mask =  1 << vm_id;
+	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
 }
 
 static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
@@ -1080,44 +1076,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
 static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, 0xffffffff);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 1 << vm_id);
-		amdgpu_ring_write(ring, 1 << vm_id);
-	}
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vm_id);
 }
 
 #if 0
@@ -1455,7 +1444,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.emit_frame_size =
 		2 + /* uvd_v7_0_ring_emit_hdp_flush */
 		2 + /* uvd_v7_0_ring_emit_hdp_invalidate */
-		34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */
+		34 + /* uvd_v7_0_ring_emit_vm_flush */
 		14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
 	.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
 	.emit_ib = uvd_v7_0_ring_emit_ib,
@@ -1481,7 +1470,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.get_wptr = uvd_v7_0_enc_ring_get_wptr,
 	.set_wptr = uvd_v7_0_enc_ring_set_wptr,
 	.emit_frame_size =
-		17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */
+		17 + /* uvd_v7_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
 		1, /* uvd_v7_0_enc_ring_insert_end */
 	.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 6374133..5e4f243 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -973,44 +973,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, 0xffffffff);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
-		/* flush TLB */
-		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 1 << vm_id);
-		amdgpu_ring_write(ring, 1 << vm_id);
-	}
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+
+	/* flush TLB */
+	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
+	amdgpu_ring_write(ring, req);
+
+	/* wait for flush */
+	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vm_id);
 }
 
 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1080,7 +1073,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 	.set_wptr = vce_v4_0_ring_set_wptr,
 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
 	.emit_frame_size =
-		17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */
+		17 + /* vce_v4_0_emit_vm_flush */
 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
 		1, /* vce_v4_0_ring_insert_end */
 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually
       [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-04-05 16:21   ` [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub Christian König
  2017-04-05 16:21   ` [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB Christian König
@ 2017-04-05 16:21   ` Christian König
       [not found]     ` <1491409320-2448-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-04-05 16:22   ` [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes Christian König
  2017-04-05 18:48   ` [PATCH 1/5] drm/amdgpu: add VMHUB to ring association Alex Deucher
  4 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2017-04-05 16:21 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

For Vega10 we have 18 VM invalidation engines for each VMHUB.

Start to assign them manually to the rings.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 12 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c    |  4 ++--
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c    |  2 +-
 6 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 45bb87b..5786cc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -179,6 +179,7 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
+	unsigned		vm_inv_eng;
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dce2950..79bfbbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3150,7 +3150,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB];
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e1637d5..4f6000b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -386,6 +386,18 @@ static int gmc_v9_0_early_init(void *handle)
 static int gmc_v9_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 0 };
+	unsigned i;
+
+	for(i = 0; i < adev->num_rings; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		unsigned vmhub = ring->funcs->vmhub;
+
+		ring->vm_inv_eng = vm_inv_eng[vmhub]++;
+		dev_info(adev->dev, "ring %u uses VM inv eng %u on hub %u\n",
+			 ring->idx, ring->vm_inv_eng, ring->funcs->vmhub);
+	}
+
 	return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 06826a0..90440e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1041,7 +1041,7 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 772c0f2..cc4f8f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1037,7 +1037,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	uint32_t data0, data1, mask;
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
@@ -1078,7 +1078,7 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 5e4f243..66474e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -975,7 +975,7 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 {
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-	unsigned eng = ring->idx;
+	unsigned eng = ring->vm_inv_eng;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes
       [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-04-05 16:21   ` [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually Christian König
@ 2017-04-05 16:22   ` Christian König
       [not found]     ` <1491409320-2448-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-04-05 18:48   ` [PATCH 1/5] drm/amdgpu: add VMHUB to ring association Alex Deucher
  4 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2017-04-05 16:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Christian König <christian.koenig@amd.com>

Enable concurrent VM flushes for Vega10.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 51 +++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6fd1952..1bb2f8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -462,11 +462,12 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	}
 	kfree(fences);
 
-	job->vm_needs_flush = true;
+	job->vm_needs_flush = false;
 	/* Check if we can use a VMID already assigned to this VM */
 	i = ring->idx;
 	do {
 		struct fence *flushed;
+		bool needs_flush = false;
 
 		id = vm->ids[i++];
 		if (i == AMDGPU_MAX_RINGS)
@@ -484,16 +485,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (job->vm_pd_addr != id->pd_gpu_addr[vmhub])
 			continue;
 
-		if (!id->last_flush[vmhub])
-			continue;
-
-		if (id->last_flush[vmhub]->context != fence_context &&
-		    !fence_is_signaled(id->last_flush[vmhub]))
-			continue;
+		if (!id->last_flush[vmhub] ||
+		    (id->last_flush[vmhub]->context != fence_context &&
+		     !fence_is_signaled(id->last_flush[vmhub])))
+			needs_flush = true;
 
 		flushed  = id->flushed_updates[vmhub];
-		if (updates &&
-		    (!flushed || fence_is_later(updates, flushed)))
+		if (updates && (!flushed || fence_is_later(updates, flushed)))
+			needs_flush = true;
+
+		/* Concurrent flushes are only possible starting with Vega10 */
+		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
 			continue;
 
 		/* Good we can use this VMID. Remember this submission as
@@ -503,15 +505,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (r)
 			goto error;
 
-		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-		vm->ids[ring->idx] = id;
-
-		job->vm_id = id - adev->vm_manager.ids;
-		job->vm_needs_flush = false;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, job);
+		if (updates && (!flushed || fence_is_later(updates, flushed))) {
+			fence_put(id->flushed_updates[vmhub]);
+			id->flushed_updates[vmhub] = fence_get(updates);
+		}
 
-		mutex_unlock(&adev->vm_manager.lock);
-		return 0;
+		if (needs_flush)
+			goto needs_flush;
+		else
+			goto no_flush_needed;
 
 	} while (i != ring->idx);
 
@@ -523,18 +525,21 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (r)
 		goto error;
 
+	id->pd_gpu_addr[vmhub] = job->vm_pd_addr;
+	fence_put(id->flushed_updates[vmhub]);
+	id->flushed_updates[vmhub] = fence_get(updates);
+	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
+	atomic64_set(&id->owner, vm->client_id);
+
+needs_flush:
+	job->vm_needs_flush = true;
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		fence_put(id->last_flush[i]);
 		id->last_flush[i] = NULL;
 	}
 
-	fence_put(id->flushed_updates[vmhub]);
-	id->flushed_updates[vmhub] = fence_get(updates);
-
-	id->pd_gpu_addr[vmhub] = job->vm_pd_addr;
-	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
+no_flush_needed:
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-	atomic64_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;
 
 	job->vm_id = id - adev->vm_manager.ids;
-- 
2.5.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB
       [not found]     ` <1491409320-2448-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-04-05 18:42       ` Alex Deucher
  0 siblings, 0 replies; 10+ messages in thread
From: Alex Deucher @ 2017-04-05 18:42 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Wed, Apr 5, 2017 at 12:21 PM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Drop invalidating both hubs from each engine.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  |  36 +++++------
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  60 +++++++++---------
>  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  | 111 +++++++++++++++------------------
>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  57 ++++++++---------
>  4 files changed, 118 insertions(+), 146 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 1cc006a..dce2950 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3147,35 +3147,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>  static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>                                         unsigned vm_id, uint64_t pd_addr)
>  {
> +       struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB];

Should use use ring->vmhub here rather than hardcoding
AMDGPU_GFXHUB/AMDGPU_MMHUB?  Same question for all the other IP below.
With that fixed:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>



>         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>         unsigned eng = ring->idx;
> -       unsigned i;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
>         BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> -       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> -               struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> -               gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> -                                          hub->ctx0_ptb_addr_lo32
> -                                          + (2 * vm_id),
> -                                          lower_32_bits(pd_addr));
> +       gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> +                                  hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
> +                                  lower_32_bits(pd_addr));
>
> -               gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> -                                          hub->ctx0_ptb_addr_hi32
> -                                          + (2 * vm_id),
> -                                          upper_32_bits(pd_addr));
> +       gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> +                                  hub->ctx0_ptb_addr_hi32 + (2 * vm_id),
> +                                  upper_32_bits(pd_addr));
>
> -               gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> -                                          hub->vm_inv_eng0_req + eng, req);
> +       gfx_v9_0_write_data_to_reg(ring, usepfp, true,
> +                                  hub->vm_inv_eng0_req + eng, req);
>
> -               /* wait for the invalidate to complete */
> -               gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
> -                                     eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
> -       }
> +       /* wait for the invalidate to complete */
> +       gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
> +                             eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
>
>         /* compute doesn't have PFP */
>         if (usepfp) {
> @@ -3680,7 +3674,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>         .emit_frame_size = /* totally 242 maximum if 16 IBs */
>                 5 +  /* COND_EXEC */
>                 7 +  /* PIPELINE_SYNC */
> -               46 + /* VM_FLUSH */
> +               24 + /* VM_FLUSH */
>                 8 +  /* FENCE for VM_FLUSH */
>                 20 + /* GDS switch */
>                 4 + /* double SWITCH_BUFFER,
> @@ -3727,7 +3721,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
>                 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
>                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> -               64 + /* gfx_v9_0_ring_emit_vm_flush */
> +               24 + /* gfx_v9_0_ring_emit_vm_flush */
>                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
>         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
>         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
> @@ -3757,7 +3751,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
>                 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
>                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> -               64 + /* gfx_v9_0_ring_emit_vm_flush */
> +               24 + /* gfx_v9_0_ring_emit_vm_flush */
>                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
>         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
>         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 8cbb49d..06826a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1039,44 +1039,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>  static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>                                          unsigned vm_id, uint64_t pd_addr)
>  {
> +       struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>         unsigned eng = ring->idx;
> -       unsigned i;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
>         BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> -       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> -               struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> -               amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> -                                 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> -               amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
> -               amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> -               amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> -                                 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> -               amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
> -               amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> -
> -               /* flush TLB */
> -               amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> -                                 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> -               amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
> -               amdgpu_ring_write(ring, req);
> -
> -               /* wait for flush */
> -               amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
> -                                 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
> -                                 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
> -               amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> -               amdgpu_ring_write(ring, 0);
> -               amdgpu_ring_write(ring, 1 << vm_id); /* reference */
> -               amdgpu_ring_write(ring, 1 << vm_id); /* mask */
> -               amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
> -                                 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
> -       }
> +       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> +                         SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> +       amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2);
> +       amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> +       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> +                         SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> +       amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2);
> +       amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> +
> +       /* flush TLB */
> +       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
> +                         SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
> +       amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
> +       amdgpu_ring_write(ring, req);
> +
> +       /* wait for flush */
> +       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
> +                         SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
> +                         SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
> +       amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> +       amdgpu_ring_write(ring, 0);
> +       amdgpu_ring_write(ring, 1 << vm_id); /* reference */
> +       amdgpu_ring_write(ring, 1 << vm_id); /* mask */
> +       amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
> +                         SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>  }
>
>  static int sdma_v4_0_early_init(void *handle)
> @@ -1481,7 +1477,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
>                 6 + /* sdma_v4_0_ring_emit_hdp_flush */
>                 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */
>                 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
> -               36 + /* sdma_v4_0_ring_emit_vm_flush */
> +               18 + /* sdma_v4_0_ring_emit_vm_flush */
>                 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
>         .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
>         .emit_ib = sdma_v4_0_ring_emit_ib,
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index fa80465..772c0f2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -1034,42 +1034,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
>  static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>                                         unsigned vm_id, uint64_t pd_addr)
>  {
> +       struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>         uint32_t data0, data1, mask;
>         unsigned eng = ring->idx;
> -       unsigned i;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
>         BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> -       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> -               struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> -               data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
> -               data1 = upper_32_bits(pd_addr);
> -               uvd_v7_0_vm_reg_write(ring, data0, data1);
> -
> -               data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
> -               data1 = lower_32_bits(pd_addr);
> -               uvd_v7_0_vm_reg_write(ring, data0, data1);
> -
> -               data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
> -               data1 = lower_32_bits(pd_addr);
> -               mask = 0xffffffff;
> -               uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> -
> -               /* flush TLB */
> -               data0 = (hub->vm_inv_eng0_req + eng) << 2;
> -               data1 = req;
> -               uvd_v7_0_vm_reg_write(ring, data0, data1);
> -
> -               /* wait for flush */
> -               data0 = (hub->vm_inv_eng0_ack + eng) << 2;
> -               data1 = 1 << vm_id;
> -               mask =  1 << vm_id;
> -               uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> -       }
> +       data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
> +       data1 = upper_32_bits(pd_addr);
> +       uvd_v7_0_vm_reg_write(ring, data0, data1);
> +
> +       data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
> +       data1 = lower_32_bits(pd_addr);
> +       uvd_v7_0_vm_reg_write(ring, data0, data1);
> +
> +       data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
> +       data1 = lower_32_bits(pd_addr);
> +       mask = 0xffffffff;
> +       uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
> +
> +       /* flush TLB */
> +       data0 = (hub->vm_inv_eng0_req + eng) << 2;
> +       data1 = req;
> +       uvd_v7_0_vm_reg_write(ring, data0, data1);
> +
> +       /* wait for flush */
> +       data0 = (hub->vm_inv_eng0_ack + eng) << 2;
> +       data1 = 1 << vm_id;
> +       mask =  1 << vm_id;
> +       uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
>  }
>
>  static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
> @@ -1080,44 +1076,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
>  static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
>                          unsigned int vm_id, uint64_t pd_addr)
>  {
> +       struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>         unsigned eng = ring->idx;
> -       unsigned i;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
>         BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> -       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> -               struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> -               amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> -               amdgpu_ring_write(ring,
> -                       (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> -               amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> -
> -               amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> -               amdgpu_ring_write(ring,
> -                       (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> -               amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> -               amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> -               amdgpu_ring_write(ring,
> -                       (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> -               amdgpu_ring_write(ring, 0xffffffff);
> -               amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> -               /* flush TLB */
> -               amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> -               amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
> -               amdgpu_ring_write(ring, req);
> -
> -               /* wait for flush */
> -               amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> -               amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> -               amdgpu_ring_write(ring, 1 << vm_id);
> -               amdgpu_ring_write(ring, 1 << vm_id);
> -       }
> +       amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> +       amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> +       amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> +
> +       amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> +       amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> +       amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> +       amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> +       amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> +       amdgpu_ring_write(ring, 0xffffffff);
> +       amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> +       /* flush TLB */
> +       amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
> +       amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
> +       amdgpu_ring_write(ring, req);
> +
> +       /* wait for flush */
> +       amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
> +       amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> +       amdgpu_ring_write(ring, 1 << vm_id);
> +       amdgpu_ring_write(ring, 1 << vm_id);
>  }
>
>  #if 0
> @@ -1455,7 +1444,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
>         .emit_frame_size =
>                 2 + /* uvd_v7_0_ring_emit_hdp_flush */
>                 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */
> -               34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */
> +               34 + /* uvd_v7_0_ring_emit_vm_flush */
>                 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
>         .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
>         .emit_ib = uvd_v7_0_ring_emit_ib,
> @@ -1481,7 +1470,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
>         .get_wptr = uvd_v7_0_enc_ring_get_wptr,
>         .set_wptr = uvd_v7_0_enc_ring_set_wptr,
>         .emit_frame_size =
> -               17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */
> +               17 + /* uvd_v7_0_enc_ring_emit_vm_flush */
>                 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
>                 1, /* uvd_v7_0_enc_ring_insert_end */
>         .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 6374133..5e4f243 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -973,44 +973,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
>  static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
>                          unsigned int vm_id, uint64_t pd_addr)
>  {
> +       struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>         unsigned eng = ring->idx;
> -       unsigned i;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
>         BUG_ON(pd_addr & 0xFFFF00000000003EULL);
>
> -       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
> -               struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
> -
> -               amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> -               amdgpu_ring_write(ring,
> -                       (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> -               amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> -
> -               amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> -               amdgpu_ring_write(ring,
> -                       (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> -               amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> -               amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> -               amdgpu_ring_write(ring,
> -                       (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> -               amdgpu_ring_write(ring, 0xffffffff);
> -               amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> -
> -               /* flush TLB */
> -               amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> -               amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
> -               amdgpu_ring_write(ring, req);
> -
> -               /* wait for flush */
> -               amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> -               amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> -               amdgpu_ring_write(ring, 1 << vm_id);
> -               amdgpu_ring_write(ring, 1 << vm_id);
> -       }
> +       amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> +       amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
> +       amdgpu_ring_write(ring, upper_32_bits(pd_addr));
> +
> +       amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> +       amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> +       amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> +       amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> +       amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
> +       amdgpu_ring_write(ring, 0xffffffff);
> +       amdgpu_ring_write(ring, lower_32_bits(pd_addr));
> +
> +       /* flush TLB */
> +       amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
> +       amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
> +       amdgpu_ring_write(ring, req);
> +
> +       /* wait for flush */
> +       amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
> +       amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
> +       amdgpu_ring_write(ring, 1 << vm_id);
> +       amdgpu_ring_write(ring, 1 << vm_id);
>  }
>
>  static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
> @@ -1080,7 +1073,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
>         .set_wptr = vce_v4_0_ring_set_wptr,
>         .parse_cs = amdgpu_vce_ring_parse_cs_vm,
>         .emit_frame_size =
> -               17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */
> +               17 + /* vce_v4_0_emit_vm_flush */
>                 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
>                 1, /* vce_v4_0_ring_insert_end */
>         .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub
       [not found]     ` <1491409320-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-04-05 18:47       ` Alex Deucher
  0 siblings, 0 replies; 10+ messages in thread
From: Alex Deucher @ 2017-04-05 18:47 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Wed, Apr 5, 2017 at 12:21 PM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Rather inefficient, but this way we only need to flush the current hub.
>
> I wonder if we shouldn't make nails with heads and separate the VMID ranges completely.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 36 ++++++++++++++++++++--------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  6 +++---
>  2 files changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 8785420..6fd1952 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -406,6 +406,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>                       struct amdgpu_job *job)
>  {
>         struct amdgpu_device *adev = ring->adev;
> +       unsigned vmhub = ring->funcs->vmhub;
>         uint64_t fence_context = adev->fence_context + ring->idx;
>         struct fence *updates = sync->last_vm_update;
>         struct amdgpu_vm_id *id, *idle;
> @@ -480,17 +481,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>                 if (atomic64_read(&id->owner) != vm->client_id)
>                         continue;
>
> -               if (job->vm_pd_addr != id->pd_gpu_addr)
> +               if (job->vm_pd_addr != id->pd_gpu_addr[vmhub])
>                         continue;
>
> -               if (!id->last_flush)
> +               if (!id->last_flush[vmhub])
>                         continue;
>
> -               if (id->last_flush->context != fence_context &&
> -                   !fence_is_signaled(id->last_flush))
> +               if (id->last_flush[vmhub]->context != fence_context &&
> +                   !fence_is_signaled(id->last_flush[vmhub]))
>                         continue;
>
> -               flushed  = id->flushed_updates;
> +               flushed  = id->flushed_updates[vmhub];
>                 if (updates &&
>                     (!flushed || fence_is_later(updates, flushed)))
>                         continue;
> @@ -522,13 +523,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>         if (r)
>                 goto error;
>
> -       fence_put(id->last_flush);
> -       id->last_flush = NULL;
> +       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {

Would it be worth storing the number of vm_hubs per chip and using
that as the limit?  That way we wouldn't loop multiple times for older
asics with only one hub.

Alex


> +               fence_put(id->last_flush[i]);
> +               id->last_flush[i] = NULL;
> +       }
>
> -       fence_put(id->flushed_updates);
> -       id->flushed_updates = fence_get(updates);
> +       fence_put(id->flushed_updates[vmhub]);
> +       id->flushed_updates[vmhub] = fence_get(updates);
>
> -       id->pd_gpu_addr = job->vm_pd_addr;
> +       id->pd_gpu_addr[vmhub] = job->vm_pd_addr;
>         id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
>         list_move_tail(&id->list, &adev->vm_manager.ids_lru);
>         atomic64_set(&id->owner, vm->client_id);
> @@ -591,6 +594,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
>  {
>         struct amdgpu_device *adev = ring->adev;
>         struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
> +       unsigned vmhub = ring->funcs->vmhub;
>         bool gds_switch_needed = ring->funcs->emit_gds_switch && (
>                 id->gds_base != job->gds_base ||
>                 id->gds_size != job->gds_size ||
> @@ -629,8 +633,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
>                         return r;
>
>                 mutex_lock(&adev->vm_manager.lock);
> -               fence_put(id->last_flush);
> -               id->last_flush = fence;
> +               fence_put(id->last_flush[vmhub]);
> +               id->last_flush[vmhub] = fence;
>                 mutex_unlock(&adev->vm_manager.lock);
>         }
>
> @@ -2234,13 +2238,15 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>   */
>  void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>  {
> -       unsigned i;
> +       unsigned i, j;
>
>         for (i = 0; i < AMDGPU_NUM_VM; ++i) {
>                 struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
>
>                 amdgpu_sync_free(&adev->vm_manager.ids[i].active);
> -               fence_put(id->flushed_updates);
> -               fence_put(id->last_flush);
> +               for (j = 0; j < AMDGPU_MAX_VMHUBS; ++j) {
> +                       fence_put(id->flushed_updates[j]);
> +                       fence_put(id->last_flush[j]);
> +               }
>         }
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 7d01372..d61dd83 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -132,12 +132,12 @@ struct amdgpu_vm {
>  struct amdgpu_vm_id {
>         struct list_head        list;
>         struct amdgpu_sync      active;
> -       struct fence            *last_flush;
> +       struct fence            *last_flush[AMDGPU_MAX_VMHUBS];
>         atomic64_t              owner;
>
> -       uint64_t                pd_gpu_addr;
> +       uint64_t                pd_gpu_addr[AMDGPU_MAX_VMHUBS];
>         /* last flushed PD/PT update */
> -       struct fence            *flushed_updates;
> +       struct fence            *flushed_updates[AMDGPU_MAX_VMHUBS];
>
>         uint32_t                current_gpu_reset_count;
>
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/5] drm/amdgpu: add VMHUB to ring association
       [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-04-05 16:22   ` [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes Christian König
@ 2017-04-05 18:48   ` Alex Deucher
  4 siblings, 0 replies; 10+ messages in thread
From: Alex Deucher @ 2017-04-05 18:48 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Wed, Apr 5, 2017 at 12:21 PM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Add the info which ring belonging to which VMHUB.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 3 +++
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 1 +
>  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c    | 2 ++
>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c    | 1 +
>  5 files changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 7479e47..45bb87b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -99,6 +99,7 @@ struct amdgpu_ring_funcs {
>         uint32_t                align_mask;
>         u32                     nop;
>         bool                    support_64bit_ptrs;
> +       unsigned                vmhub;
>
>         /* ring read/write ptr handling */
>         u64 (*get_rptr)(struct amdgpu_ring *ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index a967879..1cc006a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3673,6 +3673,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>         .align_mask = 0xff,
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
> +       .vmhub = AMDGPU_GFXHUB,
>         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
>         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
>         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
> @@ -3717,6 +3718,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>         .align_mask = 0xff,
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
> +       .vmhub = AMDGPU_GFXHUB,
>         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
>         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
>         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> @@ -3746,6 +3748,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>         .align_mask = 0xff,
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
> +       .vmhub = AMDGPU_GFXHUB,
>         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
>         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
>         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index d40eb31..8cbb49d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1473,6 +1473,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
>         .align_mask = 0xf,
>         .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
>         .support_64bit_ptrs = true,
> +       .vmhub = AMDGPU_MMHUB,
>         .get_rptr = sdma_v4_0_ring_get_rptr,
>         .get_wptr = sdma_v4_0_ring_get_wptr,
>         .set_wptr = sdma_v4_0_ring_set_wptr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index 819148a..fa80465 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -1448,6 +1448,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
>         .align_mask = 0xf,
>         .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0),
>         .support_64bit_ptrs = false,
> +       .vmhub = AMDGPU_MMHUB,
>         .get_rptr = uvd_v7_0_ring_get_rptr,
>         .get_wptr = uvd_v7_0_ring_get_wptr,
>         .set_wptr = uvd_v7_0_ring_set_wptr,
> @@ -1475,6 +1476,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
>         .align_mask = 0x3f,
>         .nop = HEVC_ENC_CMD_NO_OP,
>         .support_64bit_ptrs = false,
> +       .vmhub = AMDGPU_MMHUB,
>         .get_rptr = uvd_v7_0_enc_ring_get_rptr,
>         .get_wptr = uvd_v7_0_enc_ring_get_wptr,
>         .set_wptr = uvd_v7_0_enc_ring_set_wptr,
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 8dde83f..6374133 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -1074,6 +1074,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
>         .align_mask = 0x3f,
>         .nop = VCE_CMD_NO_OP,
>         .support_64bit_ptrs = false,
> +       .vmhub = AMDGPU_MMHUB,
>         .get_rptr = vce_v4_0_ring_get_rptr,
>         .get_wptr = vce_v4_0_ring_get_wptr,
>         .set_wptr = vce_v4_0_ring_set_wptr,
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually
       [not found]     ` <1491409320-2448-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-04-05 18:51       ` Alex Deucher
  0 siblings, 0 replies; 10+ messages in thread
From: Alex Deucher @ 2017-04-05 18:51 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Wed, Apr 5, 2017 at 12:21 PM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> For Vega10 we have 18 VM invalidation engines for each VMHUB.
>
> Start to assign them manually to the rings.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 12 ++++++++++++
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c    |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c    |  2 +-
>  6 files changed, 18 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 45bb87b..5786cc3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -179,6 +179,7 @@ struct amdgpu_ring {
>         unsigned                cond_exe_offs;
>         u64                     cond_exe_gpu_addr;
>         volatile u32            *cond_exe_cpu_addr;
> +       unsigned                vm_inv_eng;
>  #if defined(CONFIG_DEBUG_FS)
>         struct dentry *ent;
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index dce2950..79bfbbe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3150,7 +3150,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>         struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB];
>         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> -       unsigned eng = ring->idx;
> +       unsigned eng = ring->vm_inv_eng;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index e1637d5..4f6000b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -386,6 +386,18 @@ static int gmc_v9_0_early_init(void *handle)
>  static int gmc_v9_0_late_init(void *handle)
>  {
>         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> +       unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 0 };
> +       unsigned i;
> +
> +       for(i = 0; i < adev->num_rings; ++i) {
> +               struct amdgpu_ring *ring = adev->rings[i];
> +               unsigned vmhub = ring->funcs->vmhub;
> +
> +               ring->vm_inv_eng = vm_inv_eng[vmhub]++;
> +               dev_info(adev->dev, "ring %u uses VM inv eng %u on hub %u\n",
> +                        ring->idx, ring->vm_inv_eng, ring->funcs->vmhub);
> +       }
> +
>         return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 06826a0..90440e0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1041,7 +1041,7 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>  {
>         struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> -       unsigned eng = ring->idx;
> +       unsigned eng = ring->vm_inv_eng;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index 772c0f2..cc4f8f4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -1037,7 +1037,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>         struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
>         uint32_t data0, data1, mask;
> -       unsigned eng = ring->idx;
> +       unsigned eng = ring->vm_inv_eng;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
> @@ -1078,7 +1078,7 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
>  {
>         struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> -       unsigned eng = ring->idx;
> +       unsigned eng = ring->vm_inv_eng;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 5e4f243..66474e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -975,7 +975,7 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
>  {
>         struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB];
>         uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
> -       unsigned eng = ring->idx;
> +       unsigned eng = ring->vm_inv_eng;
>
>         pd_addr = pd_addr | 0x1; /* valid bit */
>         /* now only use physical base address of PDE and valid */
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes
       [not found]     ` <1491409320-2448-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-04-05 19:07       ` Alex Deucher
  0 siblings, 0 replies; 10+ messages in thread
From: Alex Deucher @ 2017-04-05 19:07 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Wed, Apr 5, 2017 at 12:22 PM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Enable concurrent VM flushes for Vega10.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Acked-by: Alex Deucher <alexander.deucher@amd.com>


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 51 +++++++++++++++++++---------------
>  1 file changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 6fd1952..1bb2f8a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -462,11 +462,12 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>         }
>         kfree(fences);
>
> -       job->vm_needs_flush = true;
> +       job->vm_needs_flush = false;
>         /* Check if we can use a VMID already assigned to this VM */
>         i = ring->idx;
>         do {
>                 struct fence *flushed;
> +               bool needs_flush = false;
>
>                 id = vm->ids[i++];
>                 if (i == AMDGPU_MAX_RINGS)
> @@ -484,16 +485,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>                 if (job->vm_pd_addr != id->pd_gpu_addr[vmhub])
>                         continue;
>
> -               if (!id->last_flush[vmhub])
> -                       continue;
> -
> -               if (id->last_flush[vmhub]->context != fence_context &&
> -                   !fence_is_signaled(id->last_flush[vmhub]))
> -                       continue;
> +               if (!id->last_flush[vmhub] ||
> +                   (id->last_flush[vmhub]->context != fence_context &&
> +                    !fence_is_signaled(id->last_flush[vmhub])))
> +                       needs_flush = true;
>
>                 flushed  = id->flushed_updates[vmhub];
> -               if (updates &&
> -                   (!flushed || fence_is_later(updates, flushed)))
> +               if (updates && (!flushed || fence_is_later(updates, flushed)))
> +                       needs_flush = true;
> +
> +               /* Concurrent flushes are only possible starting with Vega10 */
> +               if (adev->asic_type < CHIP_VEGA10 && needs_flush)
>                         continue;
>
>                 /* Good we can use this VMID. Remember this submission as
> @@ -503,15 +505,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>                 if (r)
>                         goto error;
>
> -               list_move_tail(&id->list, &adev->vm_manager.ids_lru);
> -               vm->ids[ring->idx] = id;
> -
> -               job->vm_id = id - adev->vm_manager.ids;
> -               job->vm_needs_flush = false;
> -               trace_amdgpu_vm_grab_id(vm, ring->idx, job);
> +               if (updates && (!flushed || fence_is_later(updates, flushed))) {
> +                       fence_put(id->flushed_updates[vmhub]);
> +                       id->flushed_updates[vmhub] = fence_get(updates);
> +               }
>
> -               mutex_unlock(&adev->vm_manager.lock);
> -               return 0;
> +               if (needs_flush)
> +                       goto needs_flush;
> +               else
> +                       goto no_flush_needed;
>
>         } while (i != ring->idx);
>
> @@ -523,18 +525,21 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>         if (r)
>                 goto error;
>
> +       id->pd_gpu_addr[vmhub] = job->vm_pd_addr;
> +       fence_put(id->flushed_updates[vmhub]);
> +       id->flushed_updates[vmhub] = fence_get(updates);
> +       id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
> +       atomic64_set(&id->owner, vm->client_id);
> +
> +needs_flush:
> +       job->vm_needs_flush = true;
>         for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
>                 fence_put(id->last_flush[i]);
>                 id->last_flush[i] = NULL;
>         }
>
> -       fence_put(id->flushed_updates[vmhub]);
> -       id->flushed_updates[vmhub] = fence_get(updates);
> -
> -       id->pd_gpu_addr[vmhub] = job->vm_pd_addr;
> -       id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
> +no_flush_needed:
>         list_move_tail(&id->list, &adev->vm_manager.ids_lru);
> -       atomic64_set(&id->owner, vm->client_id);
>         vm->ids[ring->idx] = id;
>
>         job->vm_id = id - adev->vm_manager.ids;
> --
> 2.5.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-04-05 19:07 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-05 16:21 [PATCH 1/5] drm/amdgpu: add VMHUB to ring association Christian König
     [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-05 16:21   ` [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub Christian König
     [not found]     ` <1491409320-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-05 18:47       ` Alex Deucher
2017-04-05 16:21   ` [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB Christian König
     [not found]     ` <1491409320-2448-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-05 18:42       ` Alex Deucher
2017-04-05 16:21   ` [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually Christian König
     [not found]     ` <1491409320-2448-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-05 18:51       ` Alex Deucher
2017-04-05 16:22   ` [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes Christian König
     [not found]     ` <1491409320-2448-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-05 19:07       ` Alex Deucher
2017-04-05 18:48   ` [PATCH 1/5] drm/amdgpu: add VMHUB to ring association Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.