* [PATCH 1/5] drm/amdgpu: add VMHUB to ring association @ 2017-04-05 16:21 Christian König [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 0 siblings, 1 reply; 10+ messages in thread From: Christian König @ 2017-04-05 16:21 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW From: Christian König <christian.koenig@amd.com> Add the info which ring belonging to which VMHUB. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 1 + 5 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7479e47..45bb87b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -99,6 +99,7 @@ struct amdgpu_ring_funcs { uint32_t align_mask; u32 nop; bool support_64bit_ptrs; + unsigned vmhub; /* ring read/write ptr handling */ u64 (*get_rptr)(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a967879..1cc006a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3673,6 +3673,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -3717,6 +3718,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -3746,6 +3748,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index d40eb31..8cbb49d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1473,6 +1473,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 819148a..fa80465 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1448,6 +1448,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .align_mask = 0xf, .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, @@ -1475,6 +1476,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 8dde83f..6374133 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1074,6 +1074,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, -- 2.5.0 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 10+ messages in thread
[parent not found: <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-04-05 16:21 ` Christian König [not found] ` <1491409320-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 16:21 ` [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB Christian König ` (3 subsequent siblings) 4 siblings, 1 reply; 10+ messages in thread From: Christian König @ 2017-04-05 16:21 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW From: Christian König <christian.koenig@amd.com> Rather inefficient, but this way we only need to flush the current hub. I wonder if we shouldn't make nails with heads and separate the VMID ranges completely. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 36 ++++++++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 +++--- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8785420..6fd1952 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -406,6 +406,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; uint64_t fence_context = adev->fence_context + ring->idx; struct fence *updates = sync->last_vm_update; struct amdgpu_vm_id *id, *idle; @@ -480,17 +481,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (atomic64_read(&id->owner) != vm->client_id) continue; - if (job->vm_pd_addr != id->pd_gpu_addr) + if (job->vm_pd_addr != id->pd_gpu_addr[vmhub]) continue; - if (!id->last_flush) + if (!id->last_flush[vmhub]) continue; - if (id->last_flush->context != fence_context && - !fence_is_signaled(id->last_flush)) + if (id->last_flush[vmhub]->context != fence_context && + !fence_is_signaled(id->last_flush[vmhub])) continue; - flushed = id->flushed_updates; + flushed = id->flushed_updates[vmhub]; if (updates && (!flushed || fence_is_later(updates, flushed))) continue; @@ -522,13 +523,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - fence_put(id->last_flush); - id->last_flush = NULL; + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + fence_put(id->last_flush[i]); + id->last_flush[i] = NULL; + } - fence_put(id->flushed_updates); - id->flushed_updates = fence_get(updates); + fence_put(id->flushed_updates[vmhub]); + id->flushed_updates[vmhub] = fence_get(updates); - id->pd_gpu_addr = job->vm_pd_addr; + id->pd_gpu_addr[vmhub] = job->vm_pd_addr; id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); list_move_tail(&id->list, &adev->vm_manager.ids_lru); atomic64_set(&id->owner, vm->client_id); @@ -591,6 +594,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id]; + unsigned vmhub = ring->funcs->vmhub; bool gds_switch_needed = ring->funcs->emit_gds_switch && ( id->gds_base != job->gds_base || id->gds_size != job->gds_size || @@ -629,8 +633,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) return r; mutex_lock(&adev->vm_manager.lock); - fence_put(id->last_flush); - id->last_flush = fence; + fence_put(id->last_flush[vmhub]); + id->last_flush[vmhub] = fence; mutex_unlock(&adev->vm_manager.lock); } @@ -2234,13 +2238,15 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) { - unsigned i; + unsigned i, j; for (i = 0; i < AMDGPU_NUM_VM; ++i) { struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; amdgpu_sync_free(&adev->vm_manager.ids[i].active); - fence_put(id->flushed_updates); - fence_put(id->last_flush); + for (j = 0; j < AMDGPU_MAX_VMHUBS; ++j) { + fence_put(id->flushed_updates[j]); + fence_put(id->last_flush[j]); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 7d01372..d61dd83 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -132,12 +132,12 @@ struct amdgpu_vm { struct amdgpu_vm_id { struct list_head list; struct amdgpu_sync active; - struct fence *last_flush; + struct fence *last_flush[AMDGPU_MAX_VMHUBS]; atomic64_t owner; - uint64_t pd_gpu_addr; + uint64_t pd_gpu_addr[AMDGPU_MAX_VMHUBS]; /* last flushed PD/PT update */ - struct fence *flushed_updates; + struct fence *flushed_updates[AMDGPU_MAX_VMHUBS]; uint32_t current_gpu_reset_count; -- 2.5.0 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 10+ messages in thread
[parent not found: <1491409320-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub [not found] ` <1491409320-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-04-05 18:47 ` Alex Deucher 0 siblings, 0 replies; 10+ messages in thread From: Alex Deucher @ 2017-04-05 18:47 UTC (permalink / raw) To: Christian König; +Cc: amd-gfx list On Wed, Apr 5, 2017 at 12:21 PM, Christian König <deathsimple@vodafone.de> wrote: > From: Christian König <christian.koenig@amd.com> > > Rather inefficient, but this way we only need to flush the current hub. > > I wonder if we shouldn't make nails with heads and separate the VMID ranges completely. > > Signed-off-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 36 ++++++++++++++++++++-------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 +++--- > 2 files changed, 24 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 8785420..6fd1952 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -406,6 +406,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > struct amdgpu_job *job) > { > struct amdgpu_device *adev = ring->adev; > + unsigned vmhub = ring->funcs->vmhub; > uint64_t fence_context = adev->fence_context + ring->idx; > struct fence *updates = sync->last_vm_update; > struct amdgpu_vm_id *id, *idle; > @@ -480,17 +481,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > if (atomic64_read(&id->owner) != vm->client_id) > continue; > > - if (job->vm_pd_addr != id->pd_gpu_addr) > + if (job->vm_pd_addr != id->pd_gpu_addr[vmhub]) > continue; > > - if (!id->last_flush) > + if (!id->last_flush[vmhub]) > continue; > > - if (id->last_flush->context != fence_context && > - !fence_is_signaled(id->last_flush)) > + if (id->last_flush[vmhub]->context != fence_context && > + !fence_is_signaled(id->last_flush[vmhub])) > continue; > > - flushed = id->flushed_updates; > + flushed = id->flushed_updates[vmhub]; > if (updates && > (!flushed || fence_is_later(updates, flushed))) > continue; > @@ -522,13 +523,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > if (r) > goto error; > > - fence_put(id->last_flush); > - id->last_flush = NULL; > + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { Would it be worth storing the number of vm_hubs per chip and using that as the limit? That way we wouldn't loop multiple times for older asics with only one hub. Alex > + fence_put(id->last_flush[i]); > + id->last_flush[i] = NULL; > + } > > - fence_put(id->flushed_updates); > - id->flushed_updates = fence_get(updates); > + fence_put(id->flushed_updates[vmhub]); > + id->flushed_updates[vmhub] = fence_get(updates); > > - id->pd_gpu_addr = job->vm_pd_addr; > + id->pd_gpu_addr[vmhub] = job->vm_pd_addr; > id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); > list_move_tail(&id->list, &adev->vm_manager.ids_lru); > atomic64_set(&id->owner, vm->client_id); > @@ -591,6 +594,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) > { > struct amdgpu_device *adev = ring->adev; > struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id]; > + unsigned vmhub = ring->funcs->vmhub; > bool gds_switch_needed = ring->funcs->emit_gds_switch && ( > id->gds_base != job->gds_base || > id->gds_size != job->gds_size || > @@ -629,8 +633,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) > return r; > > mutex_lock(&adev->vm_manager.lock); > - fence_put(id->last_flush); > - id->last_flush = fence; > + fence_put(id->last_flush[vmhub]); > + id->last_flush[vmhub] = fence; > mutex_unlock(&adev->vm_manager.lock); > } > > @@ -2234,13 +2238,15 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) > */ > void amdgpu_vm_manager_fini(struct amdgpu_device *adev) > { > - unsigned i; > + unsigned i, j; > > for (i = 0; i < AMDGPU_NUM_VM; ++i) { > struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; > > amdgpu_sync_free(&adev->vm_manager.ids[i].active); > - fence_put(id->flushed_updates); > - fence_put(id->last_flush); > + for (j = 0; j < AMDGPU_MAX_VMHUBS; ++j) { > + fence_put(id->flushed_updates[j]); > + fence_put(id->last_flush[j]); > + } > } > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index 7d01372..d61dd83 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -132,12 +132,12 @@ struct amdgpu_vm { > struct amdgpu_vm_id { > struct list_head list; > struct amdgpu_sync active; > - struct fence *last_flush; > + struct fence *last_flush[AMDGPU_MAX_VMHUBS]; > atomic64_t owner; > > - uint64_t pd_gpu_addr; > + uint64_t pd_gpu_addr[AMDGPU_MAX_VMHUBS]; > /* last flushed PD/PT update */ > - struct fence *flushed_updates; > + struct fence *flushed_updates[AMDGPU_MAX_VMHUBS]; > > uint32_t current_gpu_reset_count; > > -- > 2.5.0 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 16:21 ` [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub Christian König @ 2017-04-05 16:21 ` Christian König [not found] ` <1491409320-2448-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 16:21 ` [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually Christian König ` (2 subsequent siblings) 4 siblings, 1 reply; 10+ messages in thread From: Christian König @ 2017-04-05 16:21 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW From: Christian König <christian.koenig@amd.com> Drop invalidating both hubs from each engine. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 +++++------ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 60 +++++++++--------- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 111 +++++++++++++++------------------ drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 57 ++++++++--------- 4 files changed, 118 insertions(+), 146 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1cc006a..dce2950 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3147,35 +3147,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB]; int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->idx; - unsigned i; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_lo32 - + (2 * vm_id), - lower_32_bits(pd_addr)); + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->ctx0_ptb_addr_lo32 + (2 * vm_id), + lower_32_bits(pd_addr)); - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_hi32 - + (2 * vm_id), - upper_32_bits(pd_addr)); + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->ctx0_ptb_addr_hi32 + (2 * vm_id), + upper_32_bits(pd_addr)); - gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->vm_inv_eng0_req + eng, req); + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->vm_inv_eng0_req + eng, req); - /* wait for the invalidate to complete */ - gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + - eng, 0, 1 << vm_id, 1 << vm_id, 0x20); - } + /* wait for the invalidate to complete */ + gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + + eng, 0, 1 << vm_id, 1 << vm_id, 0x20); /* compute doesn't have PFP */ if (usepfp) { @@ -3680,7 +3674,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ 7 + /* PIPELINE_SYNC */ - 46 + /* VM_FLUSH */ + 24 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -3727,7 +3721,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ - 64 + /* gfx_v9_0_ring_emit_vm_flush */ + 24 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, @@ -3757,7 +3751,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ - 64 + /* gfx_v9_0_ring_emit_vm_flush */ + 24 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 8cbb49d..06826a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1039,44 +1039,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->idx; - unsigned i; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - /* flush TLB */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); /* reference */ - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); - } + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 1 << vm_id); /* reference */ + amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } static int sdma_v4_0_early_init(void *handle) @@ -1481,7 +1477,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { 6 + /* sdma_v4_0_ring_emit_hdp_flush */ 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - 36 + /* sdma_v4_0_ring_emit_vm_flush */ + 18 + /* sdma_v4_0_ring_emit_vm_flush */ 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ .emit_ib = sdma_v4_0_ring_emit_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index fa80465..772c0f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1034,42 +1034,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); uint32_t data0, data1, mask; unsigned eng = ring->idx; - unsigned i; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; - data1 = upper_32_bits(pd_addr); - uvd_v7_0_vm_reg_write(ring, data0, data1); - - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; - data1 = lower_32_bits(pd_addr); - uvd_v7_0_vm_reg_write(ring, data0, data1); - - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; - data1 = lower_32_bits(pd_addr); - mask = 0xffffffff; - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); - - /* flush TLB */ - data0 = (hub->vm_inv_eng0_req + eng) << 2; - data1 = req; - uvd_v7_0_vm_reg_write(ring, data0, data1); - - /* wait for flush */ - data0 = (hub->vm_inv_eng0_ack + eng) << 2; - data1 = 1 << vm_id; - mask = 1 << vm_id; - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); - } + data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data1 = upper_32_bits(pd_addr); + uvd_v7_0_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + uvd_v7_0_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); + + /* flush TLB */ + data0 = (hub->vm_inv_eng0_req + eng) << 2; + data1 = req; + uvd_v7_0_vm_reg_write(ring, data0, data1); + + /* wait for flush */ + data0 = (hub->vm_inv_eng0_ack + eng) << 2; + data1 = 1 << vm_id; + mask = 1 << vm_id; + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); } static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) @@ -1080,44 +1076,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->idx; - unsigned i; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - /* flush TLB */ - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); - } + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); } #if 0 @@ -1455,7 +1444,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .emit_frame_size = 2 + /* uvd_v7_0_ring_emit_hdp_flush */ 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */ - 34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */ + 34 + /* uvd_v7_0_ring_emit_vm_flush */ 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */ .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */ .emit_ib = uvd_v7_0_ring_emit_ib, @@ -1481,7 +1470,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, .emit_frame_size = - 17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */ + 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */ 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */ 1, /* uvd_v7_0_enc_ring_insert_end */ .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 6374133..5e4f243 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -973,44 +973,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->idx; - unsigned i; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - /* flush TLB */ - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); - } + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); } static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, @@ -1080,7 +1073,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .set_wptr = vce_v4_0_ring_set_wptr, .parse_cs = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = - 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */ + 17 + /* vce_v4_0_emit_vm_flush */ 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1, /* vce_v4_0_ring_insert_end */ .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ -- 2.5.0 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 10+ messages in thread
[parent not found: <1491409320-2448-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB [not found] ` <1491409320-2448-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-04-05 18:42 ` Alex Deucher 0 siblings, 0 replies; 10+ messages in thread From: Alex Deucher @ 2017-04-05 18:42 UTC (permalink / raw) To: Christian König; +Cc: amd-gfx list On Wed, Apr 5, 2017 at 12:21 PM, Christian König <deathsimple@vodafone.de> wrote: > From: Christian König <christian.koenig@amd.com> > > Drop invalidating both hubs from each engine. > > Signed-off-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 +++++------ > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 60 +++++++++--------- > drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 111 +++++++++++++++------------------ > drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 57 ++++++++--------- > 4 files changed, 118 insertions(+), 146 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 1cc006a..dce2950 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -3147,35 +3147,29 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) > static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > unsigned vm_id, uint64_t pd_addr) > { > + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB]; Should use use ring->vmhub here rather than hardcoding AMDGPU_GFXHUB/AMDGPU_MMHUB? Same question for all the other IP below. With that fixed: Reviewed-by: Alex Deucher <alexander.deucher@amd.com> > int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > unsigned eng = ring->idx; > - unsigned i; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > BUG_ON(pd_addr & 0xFFFF00000000003EULL); > > - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { > - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; > - > - gfx_v9_0_write_data_to_reg(ring, usepfp, true, > - hub->ctx0_ptb_addr_lo32 > - + (2 * vm_id), > - lower_32_bits(pd_addr)); > + gfx_v9_0_write_data_to_reg(ring, usepfp, true, > + hub->ctx0_ptb_addr_lo32 + (2 * vm_id), > + lower_32_bits(pd_addr)); > > - gfx_v9_0_write_data_to_reg(ring, usepfp, true, > - hub->ctx0_ptb_addr_hi32 > - + (2 * vm_id), > - upper_32_bits(pd_addr)); > + gfx_v9_0_write_data_to_reg(ring, usepfp, true, > + hub->ctx0_ptb_addr_hi32 + (2 * vm_id), > + upper_32_bits(pd_addr)); > > - gfx_v9_0_write_data_to_reg(ring, usepfp, true, > - hub->vm_inv_eng0_req + eng, req); > + gfx_v9_0_write_data_to_reg(ring, usepfp, true, > + hub->vm_inv_eng0_req + eng, req); > > - /* wait for the invalidate to complete */ > - gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + > - eng, 0, 1 << vm_id, 1 << vm_id, 0x20); > - } > + /* wait for the invalidate to complete */ > + gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + > + eng, 0, 1 << vm_id, 1 << vm_id, 0x20); > > /* compute doesn't have PFP */ > if (usepfp) { > @@ -3680,7 +3674,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { > .emit_frame_size = /* totally 242 maximum if 16 IBs */ > 5 + /* COND_EXEC */ > 7 + /* PIPELINE_SYNC */ > - 46 + /* VM_FLUSH */ > + 24 + /* VM_FLUSH */ > 8 + /* FENCE for VM_FLUSH */ > 20 + /* GDS switch */ > 4 + /* double SWITCH_BUFFER, > @@ -3727,7 +3721,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { > 7 + /* gfx_v9_0_ring_emit_hdp_flush */ > 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ > 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ > - 64 + /* gfx_v9_0_ring_emit_vm_flush */ > + 24 + /* gfx_v9_0_ring_emit_vm_flush */ > 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ > .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ > .emit_ib = gfx_v9_0_ring_emit_ib_compute, > @@ -3757,7 +3751,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { > 7 + /* gfx_v9_0_ring_emit_hdp_flush */ > 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ > 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ > - 64 + /* gfx_v9_0_ring_emit_vm_flush */ > + 24 + /* gfx_v9_0_ring_emit_vm_flush */ > 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ > .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ > .emit_ib = gfx_v9_0_ring_emit_ib_compute, > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index 8cbb49d..06826a0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -1039,44 +1039,40 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) > static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > unsigned vm_id, uint64_t pd_addr) > { > + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > unsigned eng = ring->idx; > - unsigned i; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > BUG_ON(pd_addr & 0xFFFF00000000003EULL); > > - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { > - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; > - > - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | > - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); > - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); > - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > - > - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | > - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); > - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); > - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); > - > - /* flush TLB */ > - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | > - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); > - amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); > - amdgpu_ring_write(ring, req); > - > - /* wait for flush */ > - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | > - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | > - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ > - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, 1 << vm_id); /* reference */ > - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ > - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | > - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); > - } > + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | > + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); > + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); > + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > + > + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | > + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); > + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); > + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); > + > + /* flush TLB */ > + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | > + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); > + amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); > + amdgpu_ring_write(ring, req); > + > + /* wait for flush */ > + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | > + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | > + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ > + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); > + amdgpu_ring_write(ring, 0); > + amdgpu_ring_write(ring, 1 << vm_id); /* reference */ > + amdgpu_ring_write(ring, 1 << vm_id); /* mask */ > + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | > + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); > } > > static int sdma_v4_0_early_init(void *handle) > @@ -1481,7 +1477,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { > 6 + /* sdma_v4_0_ring_emit_hdp_flush */ > 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ > 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ > - 36 + /* sdma_v4_0_ring_emit_vm_flush */ > + 18 + /* sdma_v4_0_ring_emit_vm_flush */ > 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ > .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ > .emit_ib = sdma_v4_0_ring_emit_ib, > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > index fa80465..772c0f2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > @@ -1034,42 +1034,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, > static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > unsigned vm_id, uint64_t pd_addr) > { > + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > uint32_t data0, data1, mask; > unsigned eng = ring->idx; > - unsigned i; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > BUG_ON(pd_addr & 0xFFFF00000000003EULL); > > - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { > - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; > - > - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; > - data1 = upper_32_bits(pd_addr); > - uvd_v7_0_vm_reg_write(ring, data0, data1); > - > - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; > - data1 = lower_32_bits(pd_addr); > - uvd_v7_0_vm_reg_write(ring, data0, data1); > - > - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; > - data1 = lower_32_bits(pd_addr); > - mask = 0xffffffff; > - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); > - > - /* flush TLB */ > - data0 = (hub->vm_inv_eng0_req + eng) << 2; > - data1 = req; > - uvd_v7_0_vm_reg_write(ring, data0, data1); > - > - /* wait for flush */ > - data0 = (hub->vm_inv_eng0_ack + eng) << 2; > - data1 = 1 << vm_id; > - mask = 1 << vm_id; > - uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); > - } > + data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; > + data1 = upper_32_bits(pd_addr); > + uvd_v7_0_vm_reg_write(ring, data0, data1); > + > + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; > + data1 = lower_32_bits(pd_addr); > + uvd_v7_0_vm_reg_write(ring, data0, data1); > + > + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; > + data1 = lower_32_bits(pd_addr); > + mask = 0xffffffff; > + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); > + > + /* flush TLB */ > + data0 = (hub->vm_inv_eng0_req + eng) << 2; > + data1 = req; > + uvd_v7_0_vm_reg_write(ring, data0, data1); > + > + /* wait for flush */ > + data0 = (hub->vm_inv_eng0_ack + eng) << 2; > + data1 = 1 << vm_id; > + mask = 1 << vm_id; > + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); > } > > static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) > @@ -1080,44 +1076,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) > static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, > unsigned int vm_id, uint64_t pd_addr) > { > + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > unsigned eng = ring->idx; > - unsigned i; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > BUG_ON(pd_addr & 0xFFFF00000000003EULL); > > - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { > - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; > - > - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); > - amdgpu_ring_write(ring, > - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); > - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); > - > - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); > - amdgpu_ring_write(ring, > - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); > - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > - > - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); > - amdgpu_ring_write(ring, > - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); > - amdgpu_ring_write(ring, 0xffffffff); > - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > - > - /* flush TLB */ > - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); > - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); > - amdgpu_ring_write(ring, req); > - > - /* wait for flush */ > - amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); > - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); > - amdgpu_ring_write(ring, 1 << vm_id); > - amdgpu_ring_write(ring, 1 << vm_id); > - } > + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); > + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); > + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); > + > + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); > + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); > + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > + > + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); > + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); > + amdgpu_ring_write(ring, 0xffffffff); > + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > + > + /* flush TLB */ > + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); > + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); > + amdgpu_ring_write(ring, req); > + > + /* wait for flush */ > + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); > + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); > + amdgpu_ring_write(ring, 1 << vm_id); > + amdgpu_ring_write(ring, 1 << vm_id); > } > > #if 0 > @@ -1455,7 +1444,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { > .emit_frame_size = > 2 + /* uvd_v7_0_ring_emit_hdp_flush */ > 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */ > - 34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */ > + 34 + /* uvd_v7_0_ring_emit_vm_flush */ > 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */ > .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */ > .emit_ib = uvd_v7_0_ring_emit_ib, > @@ -1481,7 +1470,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { > .get_wptr = uvd_v7_0_enc_ring_get_wptr, > .set_wptr = uvd_v7_0_enc_ring_set_wptr, > .emit_frame_size = > - 17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */ > + 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */ > 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */ > 1, /* uvd_v7_0_enc_ring_insert_end */ > .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */ > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > index 6374133..5e4f243 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > @@ -973,44 +973,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) > static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, > unsigned int vm_id, uint64_t pd_addr) > { > + struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > unsigned eng = ring->idx; > - unsigned i; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > BUG_ON(pd_addr & 0xFFFF00000000003EULL); > > - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { > - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; > - > - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); > - amdgpu_ring_write(ring, > - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); > - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); > - > - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); > - amdgpu_ring_write(ring, > - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); > - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > - > - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); > - amdgpu_ring_write(ring, > - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); > - amdgpu_ring_write(ring, 0xffffffff); > - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > - > - /* flush TLB */ > - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); > - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); > - amdgpu_ring_write(ring, req); > - > - /* wait for flush */ > - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); > - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); > - amdgpu_ring_write(ring, 1 << vm_id); > - amdgpu_ring_write(ring, 1 << vm_id); > - } > + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); > + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); > + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); > + > + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); > + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); > + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > + > + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); > + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); > + amdgpu_ring_write(ring, 0xffffffff); > + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); > + > + /* flush TLB */ > + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); > + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); > + amdgpu_ring_write(ring, req); > + > + /* wait for flush */ > + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); > + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); > + amdgpu_ring_write(ring, 1 << vm_id); > + amdgpu_ring_write(ring, 1 << vm_id); > } > > static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, > @@ -1080,7 +1073,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { > .set_wptr = vce_v4_0_ring_set_wptr, > .parse_cs = amdgpu_vce_ring_parse_cs_vm, > .emit_frame_size = > - 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */ > + 17 + /* vce_v4_0_emit_vm_flush */ > 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ > 1, /* vce_v4_0_ring_insert_end */ > .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ > -- > 2.5.0 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 16:21 ` [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub Christian König 2017-04-05 16:21 ` [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB Christian König @ 2017-04-05 16:21 ` Christian König [not found] ` <1491409320-2448-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 16:22 ` [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes Christian König 2017-04-05 18:48 ` [PATCH 1/5] drm/amdgpu: add VMHUB to ring association Alex Deucher 4 siblings, 1 reply; 10+ messages in thread From: Christian König @ 2017-04-05 16:21 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW From: Christian König <christian.koenig@amd.com> For Vega10 we have 18 VM invalidation engines for each VMHUB. Start to assign them manually to the rings. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++++++++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- 6 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 45bb87b..5786cc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -179,6 +179,7 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + unsigned vm_inv_eng; #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index dce2950..79bfbbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3150,7 +3150,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB]; int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e1637d5..4f6000b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -386,6 +386,18 @@ static int gmc_v9_0_early_init(void *handle) static int gmc_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 0 }; + unsigned i; + + for(i = 0; i < adev->num_rings; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + unsigned vmhub = ring->funcs->vmhub; + + ring->vm_inv_eng = vm_inv_eng[vmhub]++; + dev_info(adev->dev, "ring %u uses VM inv eng %u on hub %u\n", + ring->idx, ring->vm_inv_eng, ring->funcs->vmhub); + } + return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 06826a0..90440e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1041,7 +1041,7 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, { struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 772c0f2..cc4f8f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1037,7 +1037,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); uint32_t data0, data1, mask; - unsigned eng = ring->idx; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ @@ -1078,7 +1078,7 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, { struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 5e4f243..66474e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -975,7 +975,7 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, { struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ -- 2.5.0 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 10+ messages in thread
[parent not found: <1491409320-2448-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually [not found] ` <1491409320-2448-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-04-05 18:51 ` Alex Deucher 0 siblings, 0 replies; 10+ messages in thread From: Alex Deucher @ 2017-04-05 18:51 UTC (permalink / raw) To: Christian König; +Cc: amd-gfx list On Wed, Apr 5, 2017 at 12:21 PM, Christian König <deathsimple@vodafone.de> wrote: > From: Christian König <christian.koenig@amd.com> > > For Vega10 we have 18 VM invalidation engines for each VMHUB. > > Start to assign them manually to the rings. > > Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++++++++ > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- > drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- > 6 files changed, 18 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 45bb87b..5786cc3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -179,6 +179,7 @@ struct amdgpu_ring { > unsigned cond_exe_offs; > u64 cond_exe_gpu_addr; > volatile u32 *cond_exe_cpu_addr; > + unsigned vm_inv_eng; > #if defined(CONFIG_DEBUG_FS) > struct dentry *ent; > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index dce2950..79bfbbe 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -3150,7 +3150,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_GFXHUB]; > int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > - unsigned eng = ring->idx; > + unsigned eng = ring->vm_inv_eng; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index e1637d5..4f6000b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -386,6 +386,18 @@ static int gmc_v9_0_early_init(void *handle) > static int gmc_v9_0_late_init(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > + unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 0 }; > + unsigned i; > + > + for(i = 0; i < adev->num_rings; ++i) { > + struct amdgpu_ring *ring = adev->rings[i]; > + unsigned vmhub = ring->funcs->vmhub; > + > + ring->vm_inv_eng = vm_inv_eng[vmhub]++; > + dev_info(adev->dev, "ring %u uses VM inv eng %u on hub %u\n", > + ring->idx, ring->vm_inv_eng, ring->funcs->vmhub); > + } > + > return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index 06826a0..90440e0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -1041,7 +1041,7 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > { > struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > - unsigned eng = ring->idx; > + unsigned eng = ring->vm_inv_eng; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > index 772c0f2..cc4f8f4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > @@ -1037,7 +1037,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > uint32_t data0, data1, mask; > - unsigned eng = ring->idx; > + unsigned eng = ring->vm_inv_eng; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > @@ -1078,7 +1078,7 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, > { > struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > - unsigned eng = ring->idx; > + unsigned eng = ring->vm_inv_eng; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > index 5e4f243..66474e8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > @@ -975,7 +975,7 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, > { > struct amdgpu_vmhub *hub = &ring->adev->vmhub[AMDGPU_MMHUB]; > uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); > - unsigned eng = ring->idx; > + unsigned eng = ring->vm_inv_eng; > > pd_addr = pd_addr | 0x1; /* valid bit */ > /* now only use physical base address of PDE and valid */ > -- > 2.5.0 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> ` (2 preceding siblings ...) 2017-04-05 16:21 ` [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually Christian König @ 2017-04-05 16:22 ` Christian König [not found] ` <1491409320-2448-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 18:48 ` [PATCH 1/5] drm/amdgpu: add VMHUB to ring association Alex Deucher 4 siblings, 1 reply; 10+ messages in thread From: Christian König @ 2017-04-05 16:22 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW From: Christian König <christian.koenig@amd.com> Enable concurrent VM flushes for Vega10. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 51 +++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6fd1952..1bb2f8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -462,11 +462,12 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } kfree(fences); - job->vm_needs_flush = true; + job->vm_needs_flush = false; /* Check if we can use a VMID already assigned to this VM */ i = ring->idx; do { struct fence *flushed; + bool needs_flush = false; id = vm->ids[i++]; if (i == AMDGPU_MAX_RINGS) @@ -484,16 +485,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (job->vm_pd_addr != id->pd_gpu_addr[vmhub]) continue; - if (!id->last_flush[vmhub]) - continue; - - if (id->last_flush[vmhub]->context != fence_context && - !fence_is_signaled(id->last_flush[vmhub])) - continue; + if (!id->last_flush[vmhub] || + (id->last_flush[vmhub]->context != fence_context && + !fence_is_signaled(id->last_flush[vmhub]))) + needs_flush = true; flushed = id->flushed_updates[vmhub]; - if (updates && - (!flushed || fence_is_later(updates, flushed))) + if (updates && (!flushed || fence_is_later(updates, flushed))) + needs_flush = true; + + /* Concurrent flushes are only possible starting with Vega10 */ + if (adev->asic_type < CHIP_VEGA10 && needs_flush) continue; /* Good we can use this VMID. Remember this submission as @@ -503,15 +505,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - list_move_tail(&id->list, &adev->vm_manager.ids_lru); - vm->ids[ring->idx] = id; - - job->vm_id = id - adev->vm_manager.ids; - job->vm_needs_flush = false; - trace_amdgpu_vm_grab_id(vm, ring->idx, job); + if (updates && (!flushed || fence_is_later(updates, flushed))) { + fence_put(id->flushed_updates[vmhub]); + id->flushed_updates[vmhub] = fence_get(updates); + } - mutex_unlock(&adev->vm_manager.lock); - return 0; + if (needs_flush) + goto needs_flush; + else + goto no_flush_needed; } while (i != ring->idx); @@ -523,18 +525,21 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; + id->pd_gpu_addr[vmhub] = job->vm_pd_addr; + fence_put(id->flushed_updates[vmhub]); + id->flushed_updates[vmhub] = fence_get(updates); + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); + atomic64_set(&id->owner, vm->client_id); + +needs_flush: + job->vm_needs_flush = true; for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { fence_put(id->last_flush[i]); id->last_flush[i] = NULL; } - fence_put(id->flushed_updates[vmhub]); - id->flushed_updates[vmhub] = fence_get(updates); - - id->pd_gpu_addr[vmhub] = job->vm_pd_addr; - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); +no_flush_needed: list_move_tail(&id->list, &adev->vm_manager.ids_lru); - atomic64_set(&id->owner, vm->client_id); vm->ids[ring->idx] = id; job->vm_id = id - adev->vm_manager.ids; -- 2.5.0 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 10+ messages in thread
[parent not found: <1491409320-2448-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes [not found] ` <1491409320-2448-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-04-05 19:07 ` Alex Deucher 0 siblings, 0 replies; 10+ messages in thread From: Alex Deucher @ 2017-04-05 19:07 UTC (permalink / raw) To: Christian König; +Cc: amd-gfx list On Wed, Apr 5, 2017 at 12:22 PM, Christian König <deathsimple@vodafone.de> wrote: > From: Christian König <christian.koenig@amd.com> > > Enable concurrent VM flushes for Vega10. > > Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 51 +++++++++++++++++++--------------- > 1 file changed, 28 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 6fd1952..1bb2f8a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -462,11 +462,12 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > } > kfree(fences); > > - job->vm_needs_flush = true; > + job->vm_needs_flush = false; > /* Check if we can use a VMID already assigned to this VM */ > i = ring->idx; > do { > struct fence *flushed; > + bool needs_flush = false; > > id = vm->ids[i++]; > if (i == AMDGPU_MAX_RINGS) > @@ -484,16 +485,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > if (job->vm_pd_addr != id->pd_gpu_addr[vmhub]) > continue; > > - if (!id->last_flush[vmhub]) > - continue; > - > - if (id->last_flush[vmhub]->context != fence_context && > - !fence_is_signaled(id->last_flush[vmhub])) > - continue; > + if (!id->last_flush[vmhub] || > + (id->last_flush[vmhub]->context != fence_context && > + !fence_is_signaled(id->last_flush[vmhub]))) > + needs_flush = true; > > flushed = id->flushed_updates[vmhub]; > - if (updates && > - (!flushed || fence_is_later(updates, flushed))) > + if (updates && (!flushed || fence_is_later(updates, flushed))) > + needs_flush = true; > + > + /* Concurrent flushes are only possible starting with Vega10 */ > + if (adev->asic_type < CHIP_VEGA10 && needs_flush) > continue; > > /* Good we can use this VMID. Remember this submission as > @@ -503,15 +505,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > if (r) > goto error; > > - list_move_tail(&id->list, &adev->vm_manager.ids_lru); > - vm->ids[ring->idx] = id; > - > - job->vm_id = id - adev->vm_manager.ids; > - job->vm_needs_flush = false; > - trace_amdgpu_vm_grab_id(vm, ring->idx, job); > + if (updates && (!flushed || fence_is_later(updates, flushed))) { > + fence_put(id->flushed_updates[vmhub]); > + id->flushed_updates[vmhub] = fence_get(updates); > + } > > - mutex_unlock(&adev->vm_manager.lock); > - return 0; > + if (needs_flush) > + goto needs_flush; > + else > + goto no_flush_needed; > > } while (i != ring->idx); > > @@ -523,18 +525,21 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > if (r) > goto error; > > + id->pd_gpu_addr[vmhub] = job->vm_pd_addr; > + fence_put(id->flushed_updates[vmhub]); > + id->flushed_updates[vmhub] = fence_get(updates); > + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); > + atomic64_set(&id->owner, vm->client_id); > + > +needs_flush: > + job->vm_needs_flush = true; > for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { > fence_put(id->last_flush[i]); > id->last_flush[i] = NULL; > } > > - fence_put(id->flushed_updates[vmhub]); > - id->flushed_updates[vmhub] = fence_get(updates); > - > - id->pd_gpu_addr[vmhub] = job->vm_pd_addr; > - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); > +no_flush_needed: > list_move_tail(&id->list, &adev->vm_manager.ids_lru); > - atomic64_set(&id->owner, vm->client_id); > vm->ids[ring->idx] = id; > > job->vm_id = id - adev->vm_manager.ids; > -- > 2.5.0 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/5] drm/amdgpu: add VMHUB to ring association [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> ` (3 preceding siblings ...) 2017-04-05 16:22 ` [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes Christian König @ 2017-04-05 18:48 ` Alex Deucher 4 siblings, 0 replies; 10+ messages in thread From: Alex Deucher @ 2017-04-05 18:48 UTC (permalink / raw) To: Christian König; +Cc: amd-gfx list On Wed, Apr 5, 2017 at 12:21 PM, Christian König <deathsimple@vodafone.de> wrote: > From: Christian König <christian.koenig@amd.com> > > Add the info which ring belonging to which VMHUB. > > Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 1 + > drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 1 + > 5 files changed, 8 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 7479e47..45bb87b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -99,6 +99,7 @@ struct amdgpu_ring_funcs { > uint32_t align_mask; > u32 nop; > bool support_64bit_ptrs; > + unsigned vmhub; > > /* ring read/write ptr handling */ > u64 (*get_rptr)(struct amdgpu_ring *ring); > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index a967879..1cc006a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -3673,6 +3673,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, > + .vmhub = AMDGPU_GFXHUB, > .get_rptr = gfx_v9_0_ring_get_rptr_gfx, > .get_wptr = gfx_v9_0_ring_get_wptr_gfx, > .set_wptr = gfx_v9_0_ring_set_wptr_gfx, > @@ -3717,6 +3718,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, > + .vmhub = AMDGPU_GFXHUB, > .get_rptr = gfx_v9_0_ring_get_rptr_compute, > .get_wptr = gfx_v9_0_ring_get_wptr_compute, > .set_wptr = gfx_v9_0_ring_set_wptr_compute, > @@ -3746,6 +3748,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, > + .vmhub = AMDGPU_GFXHUB, > .get_rptr = gfx_v9_0_ring_get_rptr_compute, > .get_wptr = gfx_v9_0_ring_get_wptr_compute, > .set_wptr = gfx_v9_0_ring_set_wptr_compute, > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index d40eb31..8cbb49d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -1473,6 +1473,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { > .align_mask = 0xf, > .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), > .support_64bit_ptrs = true, > + .vmhub = AMDGPU_MMHUB, > .get_rptr = sdma_v4_0_ring_get_rptr, > .get_wptr = sdma_v4_0_ring_get_wptr, > .set_wptr = sdma_v4_0_ring_set_wptr, > diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > index 819148a..fa80465 100644 > --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c > @@ -1448,6 +1448,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { > .align_mask = 0xf, > .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), > .support_64bit_ptrs = false, > + .vmhub = AMDGPU_MMHUB, > .get_rptr = uvd_v7_0_ring_get_rptr, > .get_wptr = uvd_v7_0_ring_get_wptr, > .set_wptr = uvd_v7_0_ring_set_wptr, > @@ -1475,6 +1476,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { > .align_mask = 0x3f, > .nop = HEVC_ENC_CMD_NO_OP, > .support_64bit_ptrs = false, > + .vmhub = AMDGPU_MMHUB, > .get_rptr = uvd_v7_0_enc_ring_get_rptr, > .get_wptr = uvd_v7_0_enc_ring_get_wptr, > .set_wptr = uvd_v7_0_enc_ring_set_wptr, > diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > index 8dde83f..6374133 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c > @@ -1074,6 +1074,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { > .align_mask = 0x3f, > .nop = VCE_CMD_NO_OP, > .support_64bit_ptrs = false, > + .vmhub = AMDGPU_MMHUB, > .get_rptr = vce_v4_0_ring_get_rptr, > .get_wptr = vce_v4_0_ring_get_wptr, > .set_wptr = vce_v4_0_ring_set_wptr, > -- > 2.5.0 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2017-04-05 19:07 UTC | newest] Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-04-05 16:21 [PATCH 1/5] drm/amdgpu: add VMHUB to ring association Christian König [not found] ` <1491409320-2448-1-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 16:21 ` [PATCH 2/5] drm/amdgpu: separate VMID flush tracking per hub Christian König [not found] ` <1491409320-2448-2-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 18:47 ` Alex Deucher 2017-04-05 16:21 ` [PATCH 3/5] drm/amdgpu: invalidate only the currently needed VMHUB Christian König [not found] ` <1491409320-2448-3-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 18:42 ` Alex Deucher 2017-04-05 16:21 ` [PATCH 4/5] drm/amdgpu: assign VM invalidation engine manually Christian König [not found] ` <1491409320-2448-4-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 18:51 ` Alex Deucher 2017-04-05 16:22 ` [PATCH 5/5] drm/amdgpu: allow concurrent VM flushes Christian König [not found] ` <1491409320-2448-5-git-send-email-deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 2017-04-05 19:07 ` Alex Deucher 2017-04-05 18:48 ` [PATCH 1/5] drm/amdgpu: add VMHUB to ring association Alex Deucher
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.