* [PATCH] drm/amdgpu: fix TLB flushing during eviction @ 2022-03-30 9:00 Christian König 2022-03-30 20:51 ` philip yang 0 siblings, 1 reply; 9+ messages in thread From: Christian König @ 2022-03-30 9:00 UTC (permalink / raw) To: amd-gfx, Philip.Yang, felix.kuehling; +Cc: Christian König Testing the valid bit is not enough to figure out if we need to invalidate the TLB or not. During eviction it is quite likely that we move a BO from VRAM to GTT and update the page tables immediately to the new GTT address. Rework the whole function to get all the necessary parameters directly as value. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 ++++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++---- 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9992a7311387..1cac90ee3318 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, } /** - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table + * amdgpu_vm_update_range - update a range in the vm page table * - * @adev: amdgpu_device pointer of the VM - * @bo_adev: amdgpu_device pointer of the mapped BO - * @vm: requested vm + * @adev: amdgpu_device pointer to use for commands + * @vm: the VM to update the range * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback + * @flush_tlb: trigger tlb invalidation after update completed * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @offset: offset into nodes and pages_addr + * @vram_base: base for vram mappings * @res: ttm_resource to map * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * Fill in the page table entries between @start and @last. * * Returns: - * 0 for success, -EINVAL for failure. + * 0 for success, negative erro code for failure. */ -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence) +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence) { struct amdgpu_vm_update_params params; struct amdgpu_vm_tlb_seq_cb *tlb_cb; @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - addr = bo_adev->vm_manager.vram_base_offset + - cursor.start; + addr = vram_base + cursor.start; } else { addr = 0; } @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) { + if (flush_tlb || params.table_freed) { tlb_cb->vm = vm; if (!fence || !*fence || dma_fence_add_callback(*fence, &tlb_cb->cb, @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; struct dma_fence **last_update; + bool flush_tlb = clear; struct dma_resv *resv; + uint64_t vram_base; uint64_t flags; - struct amdgpu_device *bo_adev = adev; int r; if (clear || !bo) { @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } if (bo) { + struct amdgpu_device *bo_adev = adev; + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); if (amdgpu_bo_encrypted(bo)) flags |= AMDGPU_PTE_TMZ; bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + vram_base = bo_adev->vm_manager.vram_base_offset; } else { flags = 0x0; + vram_base = 0; } if (clear || (bo && bo->tbo.base.resv == @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, last_update = &bo_va->last_pt_update; if (!clear && bo_va->base.moved) { - bo_va->base.moved = false; + flush_tlb = true; list_splice_init(&bo_va->valids, &bo_va->invalids); } else if (bo_va->cleared != clear) { @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, - resv, mapping->start, - mapping->last, update_flags, - mapping->offset, mem, - pages_addr, last_update); + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, + resv, mapping->start, mapping->last, + update_flags, mapping->offset, + vram_base, mem, pages_addr, + last_update); if (r) return r; } @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, list_splice_init(&bo_va->invalids, &bo_va->valids); bo_va->cleared = clear; + bo_va->base.moved = false; if (trace_amdgpu_vm_bo_mapping_enabled()) { list_for_each_entry(mapping, &bo_va->valids, list) @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, - resv, mapping->start, - mapping->last, init_pte_value, - 0, NULL, NULL, &f); + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, + mapping->start, mapping->last, + init_pte_value, 0, 0, NULL, NULL, + &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, - addr, flags, value, NULL, NULL, NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, + addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6b7682fe84f8..1a814fbffff8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence); +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 27533f6057e0..907b02045824 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("[0x%llx 0x%llx]\n", start, last); - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL, - start, last, init_pte_value, 0, - NULL, NULL, fence); + return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start, + last, init_pte_value, 0, 0, NULL, NULL, + fence); } static int @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, pte_flags); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, - NULL, last_start, - prange->start + i, pte_flags, - last_start - prange->start, - NULL, dma_addr, - &vm->last_update); + r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL, + last_start, prange->start + i, + pte_flags, + last_start - prange->start, + bo_adev->vm_manager.vram_base_offset, + NULL, dma_addr, &vm->last_update); for (j = last_start - prange->start; j <= i; j++) dma_addr[j] |= last_domain; -- 2.25.1 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction 2022-03-30 9:00 [PATCH] drm/amdgpu: fix TLB flushing during eviction Christian König @ 2022-03-30 20:51 ` philip yang 2022-03-31 6:27 ` Christian König 0 siblings, 1 reply; 9+ messages in thread From: philip yang @ 2022-03-30 20:51 UTC (permalink / raw) To: Christian König, amd-gfx, Philip.Yang, felix.kuehling Cc: Christian König [-- Attachment #1: Type: text/html, Size: 10851 bytes --] ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction 2022-03-30 20:51 ` philip yang @ 2022-03-31 6:27 ` Christian König 2022-03-31 14:37 ` Felix Kuehling 0 siblings, 1 reply; 9+ messages in thread From: Christian König @ 2022-03-31 6:27 UTC (permalink / raw) To: philip yang, amd-gfx, Philip.Yang, felix.kuehling; +Cc: Christian König Am 30.03.22 um 22:51 schrieb philip yang: > > > On 2022-03-30 05:00, Christian König wrote: >> Testing the valid bit is not enough to figure out if we >> need to invalidate the TLB or not. >> >> During eviction it is quite likely that we move a BO from VRAM to GTT and >> update the page tables immediately to the new GTT address. >> >> Rework the whole function to get all the necessary parameters directly as >> value. >> >> Signed-off-by: Christian König<christian.koenig@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 ++++++++++++++------------ >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++--- >> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++---- >> 3 files changed, 48 insertions(+), 48 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 9992a7311387..1cac90ee3318 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, >> } >> >> /** >> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table >> + * amdgpu_vm_update_range - update a range in the vm page table >> * >> - * @adev: amdgpu_device pointer of the VM >> - * @bo_adev: amdgpu_device pointer of the mapped BO >> - * @vm: requested vm >> + * @adev: amdgpu_device pointer to use for commands >> + * @vm: the VM to update the range >> * @immediate: immediate submission in a page fault >> * @unlocked: unlocked invalidation during MM callback >> + * @flush_tlb: trigger tlb invalidation after update completed >> * @resv: fences we need to sync to >> * @start: start of mapped range >> * @last: last mapped entry >> * @flags: flags for the entries >> * @offset: offset into nodes and pages_addr >> + * @vram_base: base for vram mappings >> * @res: ttm_resource to map >> * @pages_addr: DMA addresses to use for mapping >> * @fence: optional resulting fence >> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, >> * Fill in the page table entries between @start and @last. >> * >> * Returns: >> - * 0 for success, -EINVAL for failure. >> + * 0 for success, negative erro code for failure. >> */ >> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >> - struct amdgpu_device *bo_adev, >> - struct amdgpu_vm *vm, bool immediate, >> - bool unlocked, struct dma_resv *resv, >> - uint64_t start, uint64_t last, >> - uint64_t flags, uint64_t offset, >> - struct ttm_resource *res, >> - dma_addr_t *pages_addr, >> - struct dma_fence **fence) >> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, >> + bool immediate, bool unlocked, bool flush_tlb, >> + struct dma_resv *resv, uint64_t start, uint64_t last, >> + uint64_t flags, uint64_t offset, uint64_t vram_base, >> + struct ttm_resource *res, dma_addr_t *pages_addr, >> + struct dma_fence **fence) >> { >> struct amdgpu_vm_update_params params; >> struct amdgpu_vm_tlb_seq_cb *tlb_cb; >> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >> } >> >> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { >> - addr = bo_adev->vm_manager.vram_base_offset + >> - cursor.start; >> + addr = vram_base + cursor.start; >> } else { >> addr = 0; >> } >> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >> >> r = vm->update_funcs->commit(¶ms, fence); >> >> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) { >> + if (flush_tlb || params.table_freed) { > > All change look good to me, but when I look at previous changes again, > kfd_ioctl_map_memory_to_gpu is not correct now, as this should flush > TLB if (!kfd_flush_tlb_after_unmap(dev)). > That was intentionally dropped as Felix said it wouldn't be necessary any more with the TLB flush rework. Is that really causing any trouble? Christian. > To fix it, seems we need beow change, then pass flush_tlb flag via > kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte -> > amdgpu_vm_bo_update > > -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct > amdgpu_bo_va *bo_va, > bool clear) > > - bool flush_tlb = clear; > > +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct > amdgpu_bo_va *bo_va, > bool clear, bool flush_tlb) > > + flush_tlb |= clear; > > Regards, > > Philip > >> tlb_cb->vm = vm; >> if (!fence || !*fence || >> dma_fence_add_callback(*fence, &tlb_cb->cb, >> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >> dma_addr_t *pages_addr = NULL; >> struct ttm_resource *mem; >> struct dma_fence **last_update; >> + bool flush_tlb = clear; >> struct dma_resv *resv; >> + uint64_t vram_base; >> uint64_t flags; >> - struct amdgpu_device *bo_adev = adev; >> int r; >> >> if (clear || !bo) { >> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >> } >> >> if (bo) { >> + struct amdgpu_device *bo_adev = adev; >> + >> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); >> >> if (amdgpu_bo_encrypted(bo)) >> flags |= AMDGPU_PTE_TMZ; >> >> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); >> + vram_base = bo_adev->vm_manager.vram_base_offset; >> } else { >> flags = 0x0; >> + vram_base = 0; >> } >> >> if (clear || (bo && bo->tbo.base.resv == >> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >> last_update = &bo_va->last_pt_update; >> >> if (!clear && bo_va->base.moved) { >> - bo_va->base.moved = false; >> + flush_tlb = true; >> list_splice_init(&bo_va->valids, &bo_va->invalids); >> >> } else if (bo_va->cleared != clear) { >> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >> >> trace_amdgpu_vm_bo_update(mapping); >> >> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, >> - resv, mapping->start, >> - mapping->last, update_flags, >> - mapping->offset, mem, >> - pages_addr, last_update); >> + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, >> + resv, mapping->start, mapping->last, >> + update_flags, mapping->offset, >> + vram_base, mem, pages_addr, >> + last_update); >> if (r) >> return r; >> } >> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >> >> list_splice_init(&bo_va->invalids, &bo_va->valids); >> bo_va->cleared = clear; >> + bo_va->base.moved = false; >> >> if (trace_amdgpu_vm_bo_mapping_enabled()) { >> list_for_each_entry(mapping, &bo_va->valids, list) >> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, >> mapping->start < AMDGPU_GMC_HOLE_START) >> init_pte_value = AMDGPU_PTE_DEFAULT_ATC; >> >> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, >> - resv, mapping->start, >> - mapping->last, init_pte_value, >> - 0, NULL, NULL, &f); >> + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, >> + mapping->start, mapping->last, >> + init_pte_value, 0, 0, NULL, NULL, >> + &f); >> amdgpu_vm_free_mapping(adev, vm, mapping, f); >> if (r) { >> dma_fence_put(f); >> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, >> goto error_unlock; >> } >> >> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, >> - addr, flags, value, NULL, NULL, NULL); >> + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, >> + addr, flags, value, 0, NULL, NULL, NULL); >> if (r) >> goto error_unlock; >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> index 6b7682fe84f8..1a814fbffff8 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, >> struct amdgpu_vm *vm); >> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, >> struct amdgpu_vm *vm, struct amdgpu_bo *bo); >> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >> - struct amdgpu_device *bo_adev, >> - struct amdgpu_vm *vm, bool immediate, >> - bool unlocked, struct dma_resv *resv, >> - uint64_t start, uint64_t last, >> - uint64_t flags, uint64_t offset, >> - struct ttm_resource *res, >> - dma_addr_t *pages_addr, >> - struct dma_fence **fence); >> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, >> + bool immediate, bool unlocked, bool flush_tlb, >> + struct dma_resv *resv, uint64_t start, uint64_t last, >> + uint64_t flags, uint64_t offset, uint64_t vram_base, >> + struct ttm_resource *res, dma_addr_t *pages_addr, >> + struct dma_fence **fence); >> int amdgpu_vm_bo_update(struct amdgpu_device *adev, >> struct amdgpu_bo_va *bo_va, >> bool clear); >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> index 27533f6057e0..907b02045824 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, >> >> pr_debug("[0x%llx 0x%llx]\n", start, last); >> >> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL, >> - start, last, init_pte_value, 0, >> - NULL, NULL, fence); >> + return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start, >> + last, init_pte_value, 0, 0, NULL, NULL, >> + fence); >> } >> >> static int >> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, >> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, >> pte_flags); >> >> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, >> - NULL, last_start, >> - prange->start + i, pte_flags, >> - last_start - prange->start, >> - NULL, dma_addr, >> - &vm->last_update); >> + r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL, >> + last_start, prange->start + i, >> + pte_flags, >> + last_start - prange->start, >> + bo_adev->vm_manager.vram_base_offset, >> + NULL, dma_addr, &vm->last_update); >> >> for (j = last_start - prange->start; j <= i; j++) >> dma_addr[j] |= last_domain; ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction 2022-03-31 6:27 ` Christian König @ 2022-03-31 14:37 ` Felix Kuehling 2022-03-31 15:42 ` philip yang ` (2 more replies) 0 siblings, 3 replies; 9+ messages in thread From: Felix Kuehling @ 2022-03-31 14:37 UTC (permalink / raw) To: Christian König, philip yang, amd-gfx, Philip.Yang Cc: Christian König Am 2022-03-31 um 02:27 schrieb Christian König: > Am 30.03.22 um 22:51 schrieb philip yang: >> >> >> On 2022-03-30 05:00, Christian König wrote: >>> Testing the valid bit is not enough to figure out if we >>> need to invalidate the TLB or not. >>> >>> During eviction it is quite likely that we move a BO from VRAM to >>> GTT and >>> update the page tables immediately to the new GTT address. >>> >>> Rework the whole function to get all the necessary parameters >>> directly as >>> value. >>> >>> Signed-off-by: Christian König<christian.koenig@amd.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 >>> ++++++++++++++------------ >>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++--- >>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++---- >>> 3 files changed, 48 insertions(+), 48 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> index 9992a7311387..1cac90ee3318 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct >>> dma_fence *fence, >>> } >>> /** >>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table >>> + * amdgpu_vm_update_range - update a range in the vm page table >>> * >>> - * @adev: amdgpu_device pointer of the VM >>> - * @bo_adev: amdgpu_device pointer of the mapped BO >>> - * @vm: requested vm >>> + * @adev: amdgpu_device pointer to use for commands >>> + * @vm: the VM to update the range >>> * @immediate: immediate submission in a page fault >>> * @unlocked: unlocked invalidation during MM callback >>> + * @flush_tlb: trigger tlb invalidation after update completed >>> * @resv: fences we need to sync to >>> * @start: start of mapped range >>> * @last: last mapped entry >>> * @flags: flags for the entries >>> * @offset: offset into nodes and pages_addr >>> + * @vram_base: base for vram mappings >>> * @res: ttm_resource to map >>> * @pages_addr: DMA addresses to use for mapping >>> * @fence: optional resulting fence >>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct >>> dma_fence *fence, >>> * Fill in the page table entries between @start and @last. >>> * >>> * Returns: >>> - * 0 for success, -EINVAL for failure. >>> + * 0 for success, negative erro code for failure. >>> */ >>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >>> - struct amdgpu_device *bo_adev, >>> - struct amdgpu_vm *vm, bool immediate, >>> - bool unlocked, struct dma_resv *resv, >>> - uint64_t start, uint64_t last, >>> - uint64_t flags, uint64_t offset, >>> - struct ttm_resource *res, >>> - dma_addr_t *pages_addr, >>> - struct dma_fence **fence) >>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct >>> amdgpu_vm *vm, >>> + bool immediate, bool unlocked, bool flush_tlb, >>> + struct dma_resv *resv, uint64_t start, uint64_t last, >>> + uint64_t flags, uint64_t offset, uint64_t vram_base, >>> + struct ttm_resource *res, dma_addr_t *pages_addr, >>> + struct dma_fence **fence) >>> { >>> struct amdgpu_vm_update_params params; >>> struct amdgpu_vm_tlb_seq_cb *tlb_cb; >>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct >>> amdgpu_device *adev, >>> } >>> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { >>> - addr = bo_adev->vm_manager.vram_base_offset + >>> - cursor.start; >>> + addr = vram_base + cursor.start; >>> } else { >>> addr = 0; >>> } >>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct >>> amdgpu_device *adev, >>> r = vm->update_funcs->commit(¶ms, fence); >>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) { >>> + if (flush_tlb || params.table_freed) { >> >> All change look good to me, but when I look at previous changes >> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this should >> flush TLB if (!kfd_flush_tlb_after_unmap(dev)). >> > > That was intentionally dropped as Felix said it wouldn't be necessary > any more with the TLB flush rework. > > Is that really causing any trouble? I discussed it with Philip offline. The TLB flushing in kfd_ioctl_map_memory_to_gpu is still there, but it is no longer conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb checks the sequence number to find out if the flush is needed (presumably because we didn't flush after unmap). There is one case on Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache. In that case, we probably need to flush more frequently because a cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb would need to ignore the sequence number and heavy-weight flush TLB unconditionally in this case. Regards, Felix > > Christian. > >> To fix it, seems we need beow change, then pass flush_tlb flag via >> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte -> >> amdgpu_vm_bo_update >> >> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct >> amdgpu_bo_va *bo_va, >> bool clear) >> >> - bool flush_tlb = clear; >> >> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct >> amdgpu_bo_va *bo_va, >> bool clear, bool flush_tlb) >> >> + flush_tlb |= clear; >> >> Regards, >> >> Philip >> >>> tlb_cb->vm = vm; >>> if (!fence || !*fence || >>> dma_fence_add_callback(*fence, &tlb_cb->cb, >>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>> *adev, struct amdgpu_bo_va *bo_va, >>> dma_addr_t *pages_addr = NULL; >>> struct ttm_resource *mem; >>> struct dma_fence **last_update; >>> + bool flush_tlb = clear; >>> struct dma_resv *resv; >>> + uint64_t vram_base; >>> uint64_t flags; >>> - struct amdgpu_device *bo_adev = adev; >>> int r; >>> if (clear || !bo) { >>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>> *adev, struct amdgpu_bo_va *bo_va, >>> } >>> if (bo) { >>> + struct amdgpu_device *bo_adev = adev; >>> + >>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); >>> if (amdgpu_bo_encrypted(bo)) >>> flags |= AMDGPU_PTE_TMZ; >>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); >>> + vram_base = bo_adev->vm_manager.vram_base_offset; >>> } else { >>> flags = 0x0; >>> + vram_base = 0; >>> } >>> if (clear || (bo && bo->tbo.base.resv == >>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>> *adev, struct amdgpu_bo_va *bo_va, >>> last_update = &bo_va->last_pt_update; >>> if (!clear && bo_va->base.moved) { >>> - bo_va->base.moved = false; >>> + flush_tlb = true; >>> list_splice_init(&bo_va->valids, &bo_va->invalids); >>> } else if (bo_va->cleared != clear) { >>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>> *adev, struct amdgpu_bo_va *bo_va, >>> trace_amdgpu_vm_bo_update(mapping); >>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, >>> false, >>> - resv, mapping->start, >>> - mapping->last, update_flags, >>> - mapping->offset, mem, >>> - pages_addr, last_update); >>> + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, >>> + resv, mapping->start, mapping->last, >>> + update_flags, mapping->offset, >>> + vram_base, mem, pages_addr, >>> + last_update); >>> if (r) >>> return r; >>> } >>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>> *adev, struct amdgpu_bo_va *bo_va, >>> list_splice_init(&bo_va->invalids, &bo_va->valids); >>> bo_va->cleared = clear; >>> + bo_va->base.moved = false; >>> if (trace_amdgpu_vm_bo_mapping_enabled()) { >>> list_for_each_entry(mapping, &bo_va->valids, list) >>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct >>> amdgpu_device *adev, >>> mapping->start < AMDGPU_GMC_HOLE_START) >>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC; >>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>> false, >>> - resv, mapping->start, >>> - mapping->last, init_pte_value, >>> - 0, NULL, NULL, &f); >>> + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, >>> + mapping->start, mapping->last, >>> + init_pte_value, 0, 0, NULL, NULL, >>> + &f); >>> amdgpu_vm_free_mapping(adev, vm, mapping, f); >>> if (r) { >>> dma_fence_put(f); >>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct >>> amdgpu_device *adev, u32 pasid, >>> goto error_unlock; >>> } >>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, >>> NULL, addr, >>> - addr, flags, value, NULL, NULL, NULL); >>> + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, >>> addr, >>> + addr, flags, value, 0, NULL, NULL, NULL); >>> if (r) >>> goto error_unlock; >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> index 6b7682fe84f8..1a814fbffff8 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct >>> amdgpu_device *adev, >>> struct amdgpu_vm *vm); >>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, >>> struct amdgpu_vm *vm, struct amdgpu_bo *bo); >>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >>> - struct amdgpu_device *bo_adev, >>> - struct amdgpu_vm *vm, bool immediate, >>> - bool unlocked, struct dma_resv *resv, >>> - uint64_t start, uint64_t last, >>> - uint64_t flags, uint64_t offset, >>> - struct ttm_resource *res, >>> - dma_addr_t *pages_addr, >>> - struct dma_fence **fence); >>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct >>> amdgpu_vm *vm, >>> + bool immediate, bool unlocked, bool flush_tlb, >>> + struct dma_resv *resv, uint64_t start, uint64_t last, >>> + uint64_t flags, uint64_t offset, uint64_t vram_base, >>> + struct ttm_resource *res, dma_addr_t *pages_addr, >>> + struct dma_fence **fence); >>> int amdgpu_vm_bo_update(struct amdgpu_device *adev, >>> struct amdgpu_bo_va *bo_va, >>> bool clear); >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> index 27533f6057e0..907b02045824 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device >>> *adev, struct amdgpu_vm *vm, >>> pr_debug("[0x%llx 0x%llx]\n", start, last); >>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>> true, NULL, >>> - start, last, init_pte_value, 0, >>> - NULL, NULL, fence); >>> + return amdgpu_vm_update_range(adev, vm, false, true, true, >>> NULL, start, >>> + last, init_pte_value, 0, 0, NULL, NULL, >>> + fence); >>> } >>> static int >>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct >>> kfd_process_device *pdd, struct svm_range *prange, >>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, >>> pte_flags); >>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, >>> false, >>> - NULL, last_start, >>> - prange->start + i, pte_flags, >>> - last_start - prange->start, >>> - NULL, dma_addr, >>> - &vm->last_update); >>> + r = amdgpu_vm_update_range(adev, vm, false, false, false, >>> NULL, >>> + last_start, prange->start + i, >>> + pte_flags, >>> + last_start - prange->start, >>> + bo_adev->vm_manager.vram_base_offset, >>> + NULL, dma_addr, &vm->last_update); >>> for (j = last_start - prange->start; j <= i; j++) >>> dma_addr[j] |= last_domain; > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction 2022-03-31 14:37 ` Felix Kuehling @ 2022-03-31 15:42 ` philip yang 2022-03-31 15:46 ` philip yang 2022-04-01 8:24 ` Christian König 2 siblings, 0 replies; 9+ messages in thread From: philip yang @ 2022-03-31 15:42 UTC (permalink / raw) To: Felix Kuehling, Christian König, amd-gfx, Philip.Yang Cc: Christian König [-- Attachment #1: Type: text/html, Size: 30125 bytes --] ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction 2022-03-31 14:37 ` Felix Kuehling 2022-03-31 15:42 ` philip yang @ 2022-03-31 15:46 ` philip yang 2022-04-01 8:24 ` Christian König 2 siblings, 0 replies; 9+ messages in thread From: philip yang @ 2022-03-31 15:46 UTC (permalink / raw) To: Felix Kuehling, Christian König, amd-gfx, Philip.Yang Cc: Christian König [-- Attachment #1: Type: text/html, Size: 29695 bytes --] ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction 2022-03-31 14:37 ` Felix Kuehling 2022-03-31 15:42 ` philip yang 2022-03-31 15:46 ` philip yang @ 2022-04-01 8:24 ` Christian König 2022-04-01 19:47 ` Felix Kuehling 2 siblings, 1 reply; 9+ messages in thread From: Christian König @ 2022-04-01 8:24 UTC (permalink / raw) To: Felix Kuehling, philip yang, amd-gfx, Philip.Yang; +Cc: Christian König Am 31.03.22 um 16:37 schrieb Felix Kuehling: > Am 2022-03-31 um 02:27 schrieb Christian König: >> Am 30.03.22 um 22:51 schrieb philip yang: >>> >>> >>> On 2022-03-30 05:00, Christian König wrote: >>>> Testing the valid bit is not enough to figure out if we >>>> need to invalidate the TLB or not. >>>> >>>> During eviction it is quite likely that we move a BO from VRAM to >>>> GTT and >>>> update the page tables immediately to the new GTT address. >>>> >>>> Rework the whole function to get all the necessary parameters >>>> directly as >>>> value. >>>> >>>> Signed-off-by: Christian König<christian.koenig@amd.com> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 >>>> ++++++++++++++------------ >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++--- >>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++---- >>>> 3 files changed, 48 insertions(+), 48 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> index 9992a7311387..1cac90ee3318 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct >>>> dma_fence *fence, >>>> } >>>> /** >>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page >>>> table >>>> + * amdgpu_vm_update_range - update a range in the vm page table >>>> * >>>> - * @adev: amdgpu_device pointer of the VM >>>> - * @bo_adev: amdgpu_device pointer of the mapped BO >>>> - * @vm: requested vm >>>> + * @adev: amdgpu_device pointer to use for commands >>>> + * @vm: the VM to update the range >>>> * @immediate: immediate submission in a page fault >>>> * @unlocked: unlocked invalidation during MM callback >>>> + * @flush_tlb: trigger tlb invalidation after update completed >>>> * @resv: fences we need to sync to >>>> * @start: start of mapped range >>>> * @last: last mapped entry >>>> * @flags: flags for the entries >>>> * @offset: offset into nodes and pages_addr >>>> + * @vram_base: base for vram mappings >>>> * @res: ttm_resource to map >>>> * @pages_addr: DMA addresses to use for mapping >>>> * @fence: optional resulting fence >>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct >>>> dma_fence *fence, >>>> * Fill in the page table entries between @start and @last. >>>> * >>>> * Returns: >>>> - * 0 for success, -EINVAL for failure. >>>> + * 0 for success, negative erro code for failure. >>>> */ >>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >>>> - struct amdgpu_device *bo_adev, >>>> - struct amdgpu_vm *vm, bool immediate, >>>> - bool unlocked, struct dma_resv *resv, >>>> - uint64_t start, uint64_t last, >>>> - uint64_t flags, uint64_t offset, >>>> - struct ttm_resource *res, >>>> - dma_addr_t *pages_addr, >>>> - struct dma_fence **fence) >>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct >>>> amdgpu_vm *vm, >>>> + bool immediate, bool unlocked, bool flush_tlb, >>>> + struct dma_resv *resv, uint64_t start, uint64_t last, >>>> + uint64_t flags, uint64_t offset, uint64_t vram_base, >>>> + struct ttm_resource *res, dma_addr_t *pages_addr, >>>> + struct dma_fence **fence) >>>> { >>>> struct amdgpu_vm_update_params params; >>>> struct amdgpu_vm_tlb_seq_cb *tlb_cb; >>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct >>>> amdgpu_device *adev, >>>> } >>>> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { >>>> - addr = bo_adev->vm_manager.vram_base_offset + >>>> - cursor.start; >>>> + addr = vram_base + cursor.start; >>>> } else { >>>> addr = 0; >>>> } >>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct >>>> amdgpu_device *adev, >>>> r = vm->update_funcs->commit(¶ms, fence); >>>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) { >>>> + if (flush_tlb || params.table_freed) { >>> >>> All change look good to me, but when I look at previous changes >>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this >>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)). >>> >> >> That was intentionally dropped as Felix said it wouldn't be necessary >> any more with the TLB flush rework. >> >> Is that really causing any trouble? > > I discussed it with Philip offline. The TLB flushing in > kfd_ioctl_map_memory_to_gpu is still there, but it is no longer > conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb > checks the sequence number to find out if the flush is needed > (presumably because we didn't flush after unmap). > > There is one case on Vega20+XGMI where PTEs get inadvertently cached > in L2 texture cache. Ah, that one. Yeah I do remember that issue. > In that case, we probably need to flush more frequently because a > cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb > would need to ignore the sequence number and heavy-weight flush TLB > unconditionally in this case. Ok, but I think that is outside of the scope of the VM handling then. Or should we somehow handle that in the VM code as well? I mean incrementing the sequence when the involved BO is mapped through XGMI is trivial. I just can't easily signal that we need a heavy-weight flush. Thanks, Christian. > > Regards, > Felix > >> >> Christian. >> >>> To fix it, seems we need beow change, then pass flush_tlb flag via >>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte >>> -> amdgpu_vm_bo_update >>> >>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct >>> amdgpu_bo_va *bo_va, >>> bool clear) >>> >>> - bool flush_tlb = clear; >>> >>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct >>> amdgpu_bo_va *bo_va, >>> bool clear, bool flush_tlb) >>> >>> + flush_tlb |= clear; >>> >>> Regards, >>> >>> Philip >>> >>>> tlb_cb->vm = vm; >>>> if (!fence || !*fence || >>>> dma_fence_add_callback(*fence, &tlb_cb->cb, >>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>>> *adev, struct amdgpu_bo_va *bo_va, >>>> dma_addr_t *pages_addr = NULL; >>>> struct ttm_resource *mem; >>>> struct dma_fence **last_update; >>>> + bool flush_tlb = clear; >>>> struct dma_resv *resv; >>>> + uint64_t vram_base; >>>> uint64_t flags; >>>> - struct amdgpu_device *bo_adev = adev; >>>> int r; >>>> if (clear || !bo) { >>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct >>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>> } >>>> if (bo) { >>>> + struct amdgpu_device *bo_adev = adev; >>>> + >>>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); >>>> if (amdgpu_bo_encrypted(bo)) >>>> flags |= AMDGPU_PTE_TMZ; >>>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); >>>> + vram_base = bo_adev->vm_manager.vram_base_offset; >>>> } else { >>>> flags = 0x0; >>>> + vram_base = 0; >>>> } >>>> if (clear || (bo && bo->tbo.base.resv == >>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>>> *adev, struct amdgpu_bo_va *bo_va, >>>> last_update = &bo_va->last_pt_update; >>>> if (!clear && bo_va->base.moved) { >>>> - bo_va->base.moved = false; >>>> + flush_tlb = true; >>>> list_splice_init(&bo_va->valids, &bo_va->invalids); >>>> } else if (bo_va->cleared != clear) { >>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct >>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>> trace_amdgpu_vm_bo_update(mapping); >>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, >>>> false, false, >>>> - resv, mapping->start, >>>> - mapping->last, update_flags, >>>> - mapping->offset, mem, >>>> - pages_addr, last_update); >>>> + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, >>>> + resv, mapping->start, mapping->last, >>>> + update_flags, mapping->offset, >>>> + vram_base, mem, pages_addr, >>>> + last_update); >>>> if (r) >>>> return r; >>>> } >>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>>> *adev, struct amdgpu_bo_va *bo_va, >>>> list_splice_init(&bo_va->invalids, &bo_va->valids); >>>> bo_va->cleared = clear; >>>> + bo_va->base.moved = false; >>>> if (trace_amdgpu_vm_bo_mapping_enabled()) { >>>> list_for_each_entry(mapping, &bo_va->valids, list) >>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct >>>> amdgpu_device *adev, >>>> mapping->start < AMDGPU_GMC_HOLE_START) >>>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC; >>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>>> false, >>>> - resv, mapping->start, >>>> - mapping->last, init_pte_value, >>>> - 0, NULL, NULL, &f); >>>> + r = amdgpu_vm_update_range(adev, vm, false, false, true, >>>> resv, >>>> + mapping->start, mapping->last, >>>> + init_pte_value, 0, 0, NULL, NULL, >>>> + &f); >>>> amdgpu_vm_free_mapping(adev, vm, mapping, f); >>>> if (r) { >>>> dma_fence_put(f); >>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct >>>> amdgpu_device *adev, u32 pasid, >>>> goto error_unlock; >>>> } >>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, >>>> NULL, addr, >>>> - addr, flags, value, NULL, NULL, NULL); >>>> + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, >>>> addr, >>>> + addr, flags, value, 0, NULL, NULL, NULL); >>>> if (r) >>>> goto error_unlock; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> index 6b7682fe84f8..1a814fbffff8 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct >>>> amdgpu_device *adev, >>>> struct amdgpu_vm *vm); >>>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, >>>> struct amdgpu_vm *vm, struct amdgpu_bo *bo); >>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >>>> - struct amdgpu_device *bo_adev, >>>> - struct amdgpu_vm *vm, bool immediate, >>>> - bool unlocked, struct dma_resv *resv, >>>> - uint64_t start, uint64_t last, >>>> - uint64_t flags, uint64_t offset, >>>> - struct ttm_resource *res, >>>> - dma_addr_t *pages_addr, >>>> - struct dma_fence **fence); >>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct >>>> amdgpu_vm *vm, >>>> + bool immediate, bool unlocked, bool flush_tlb, >>>> + struct dma_resv *resv, uint64_t start, uint64_t last, >>>> + uint64_t flags, uint64_t offset, uint64_t vram_base, >>>> + struct ttm_resource *res, dma_addr_t *pages_addr, >>>> + struct dma_fence **fence); >>>> int amdgpu_vm_bo_update(struct amdgpu_device *adev, >>>> struct amdgpu_bo_va *bo_va, >>>> bool clear); >>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>> index 27533f6057e0..907b02045824 100644 >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device >>>> *adev, struct amdgpu_vm *vm, >>>> pr_debug("[0x%llx 0x%llx]\n", start, last); >>>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>>> true, NULL, >>>> - start, last, init_pte_value, 0, >>>> - NULL, NULL, fence); >>>> + return amdgpu_vm_update_range(adev, vm, false, true, true, >>>> NULL, start, >>>> + last, init_pte_value, 0, 0, NULL, NULL, >>>> + fence); >>>> } >>>> static int >>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct >>>> kfd_process_device *pdd, struct svm_range *prange, >>>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, >>>> pte_flags); >>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, >>>> false, false, >>>> - NULL, last_start, >>>> - prange->start + i, pte_flags, >>>> - last_start - prange->start, >>>> - NULL, dma_addr, >>>> - &vm->last_update); >>>> + r = amdgpu_vm_update_range(adev, vm, false, false, false, >>>> NULL, >>>> + last_start, prange->start + i, >>>> + pte_flags, >>>> + last_start - prange->start, >>>> + bo_adev->vm_manager.vram_base_offset, >>>> + NULL, dma_addr, &vm->last_update); >>>> for (j = last_start - prange->start; j <= i; j++) >>>> dma_addr[j] |= last_domain; >> ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction 2022-04-01 8:24 ` Christian König @ 2022-04-01 19:47 ` Felix Kuehling 2022-04-03 15:09 ` Christian König 0 siblings, 1 reply; 9+ messages in thread From: Felix Kuehling @ 2022-04-01 19:47 UTC (permalink / raw) To: Christian König, philip yang, amd-gfx, Philip.Yang Cc: Christian König On 2022-04-01 04:24, Christian König wrote: > Am 31.03.22 um 16:37 schrieb Felix Kuehling: >> Am 2022-03-31 um 02:27 schrieb Christian König: >>> Am 30.03.22 um 22:51 schrieb philip yang: >>>> >>>> >>>> On 2022-03-30 05:00, Christian König wrote: >>>>> Testing the valid bit is not enough to figure out if we >>>>> need to invalidate the TLB or not. >>>>> >>>>> During eviction it is quite likely that we move a BO from VRAM to >>>>> GTT and >>>>> update the page tables immediately to the new GTT address. >>>>> >>>>> Rework the whole function to get all the necessary parameters >>>>> directly as >>>>> value. >>>>> >>>>> Signed-off-by: Christian König<christian.koenig@amd.com> >>>>> --- >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 >>>>> ++++++++++++++------------ >>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++--- >>>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++---- >>>>> 3 files changed, 48 insertions(+), 48 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>> index 9992a7311387..1cac90ee3318 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct >>>>> dma_fence *fence, >>>>> } >>>>> /** >>>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page >>>>> table >>>>> + * amdgpu_vm_update_range - update a range in the vm page table >>>>> * >>>>> - * @adev: amdgpu_device pointer of the VM >>>>> - * @bo_adev: amdgpu_device pointer of the mapped BO >>>>> - * @vm: requested vm >>>>> + * @adev: amdgpu_device pointer to use for commands >>>>> + * @vm: the VM to update the range >>>>> * @immediate: immediate submission in a page fault >>>>> * @unlocked: unlocked invalidation during MM callback >>>>> + * @flush_tlb: trigger tlb invalidation after update completed >>>>> * @resv: fences we need to sync to >>>>> * @start: start of mapped range >>>>> * @last: last mapped entry >>>>> * @flags: flags for the entries >>>>> * @offset: offset into nodes and pages_addr >>>>> + * @vram_base: base for vram mappings >>>>> * @res: ttm_resource to map >>>>> * @pages_addr: DMA addresses to use for mapping >>>>> * @fence: optional resulting fence >>>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct >>>>> dma_fence *fence, >>>>> * Fill in the page table entries between @start and @last. >>>>> * >>>>> * Returns: >>>>> - * 0 for success, -EINVAL for failure. >>>>> + * 0 for success, negative erro code for failure. >>>>> */ >>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >>>>> - struct amdgpu_device *bo_adev, >>>>> - struct amdgpu_vm *vm, bool immediate, >>>>> - bool unlocked, struct dma_resv *resv, >>>>> - uint64_t start, uint64_t last, >>>>> - uint64_t flags, uint64_t offset, >>>>> - struct ttm_resource *res, >>>>> - dma_addr_t *pages_addr, >>>>> - struct dma_fence **fence) >>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct >>>>> amdgpu_vm *vm, >>>>> + bool immediate, bool unlocked, bool flush_tlb, >>>>> + struct dma_resv *resv, uint64_t start, uint64_t last, >>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base, >>>>> + struct ttm_resource *res, dma_addr_t *pages_addr, >>>>> + struct dma_fence **fence) >>>>> { >>>>> struct amdgpu_vm_update_params params; >>>>> struct amdgpu_vm_tlb_seq_cb *tlb_cb; >>>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct >>>>> amdgpu_device *adev, >>>>> } >>>>> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { >>>>> - addr = bo_adev->vm_manager.vram_base_offset + >>>>> - cursor.start; >>>>> + addr = vram_base + cursor.start; >>>>> } else { >>>>> addr = 0; >>>>> } >>>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct >>>>> amdgpu_device *adev, >>>>> r = vm->update_funcs->commit(¶ms, fence); >>>>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) { >>>>> + if (flush_tlb || params.table_freed) { >>>> >>>> All change look good to me, but when I look at previous changes >>>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this >>>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)). >>>> >>> >>> That was intentionally dropped as Felix said it wouldn't be >>> necessary any more with the TLB flush rework. >>> >>> Is that really causing any trouble? >> >> I discussed it with Philip offline. The TLB flushing in >> kfd_ioctl_map_memory_to_gpu is still there, but it is no longer >> conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb >> checks the sequence number to find out if the flush is needed >> (presumably because we didn't flush after unmap). >> >> There is one case on Vega20+XGMI where PTEs get inadvertently cached >> in L2 texture cache. > > Ah, that one. Yeah I do remember that issue. > >> In that case, we probably need to flush more frequently because a >> cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb >> would need to ignore the sequence number and heavy-weight flush TLB >> unconditionally in this case. > > Ok, but I think that is outside of the scope of the VM handling then. > Or should we somehow handle that in the VM code as well? I think it would make sense to apply the workaround in the VM code now. You'd need to do this on all mappings on Vega20+XGMI. It doesn't matter whether the mapping itself involves XGMI. AIUI, the incorrect caching happens for all mappings when the XGMI bridge is physically present. > > I mean incrementing the sequence when the involved BO is mapped > through XGMI is trivial. I just can't easily signal that we need a > heavy-weight flush. There is already code in gmc_v9_0.c to force heavy-weight flushing, and doing an double flush to make sure the TLB is flushed after the L2 texture cache. grep -A4 "Vega20+XGMI" gmc_v9_0.c for details. Regards, Felix > > Thanks, > Christian. > >> >> Regards, >> Felix >> >>> >>> Christian. >>> >>>> To fix it, seems we need beow change, then pass flush_tlb flag via >>>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte >>>> -> amdgpu_vm_bo_update >>>> >>>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct >>>> amdgpu_bo_va *bo_va, >>>> bool clear) >>>> >>>> - bool flush_tlb = clear; >>>> >>>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct >>>> amdgpu_bo_va *bo_va, >>>> bool clear, bool flush_tlb) >>>> >>>> + flush_tlb |= clear; >>>> >>>> Regards, >>>> >>>> Philip >>>> >>>>> tlb_cb->vm = vm; >>>>> if (!fence || !*fence || >>>>> dma_fence_add_callback(*fence, &tlb_cb->cb, >>>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct >>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>>> dma_addr_t *pages_addr = NULL; >>>>> struct ttm_resource *mem; >>>>> struct dma_fence **last_update; >>>>> + bool flush_tlb = clear; >>>>> struct dma_resv *resv; >>>>> + uint64_t vram_base; >>>>> uint64_t flags; >>>>> - struct amdgpu_device *bo_adev = adev; >>>>> int r; >>>>> if (clear || !bo) { >>>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct >>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>>> } >>>>> if (bo) { >>>>> + struct amdgpu_device *bo_adev = adev; >>>>> + >>>>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); >>>>> if (amdgpu_bo_encrypted(bo)) >>>>> flags |= AMDGPU_PTE_TMZ; >>>>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); >>>>> + vram_base = bo_adev->vm_manager.vram_base_offset; >>>>> } else { >>>>> flags = 0x0; >>>>> + vram_base = 0; >>>>> } >>>>> if (clear || (bo && bo->tbo.base.resv == >>>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>>>> *adev, struct amdgpu_bo_va *bo_va, >>>>> last_update = &bo_va->last_pt_update; >>>>> if (!clear && bo_va->base.moved) { >>>>> - bo_va->base.moved = false; >>>>> + flush_tlb = true; >>>>> list_splice_init(&bo_va->valids, &bo_va->invalids); >>>>> } else if (bo_va->cleared != clear) { >>>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct >>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>>> trace_amdgpu_vm_bo_update(mapping); >>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, >>>>> false, false, >>>>> - resv, mapping->start, >>>>> - mapping->last, update_flags, >>>>> - mapping->offset, mem, >>>>> - pages_addr, last_update); >>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, >>>>> flush_tlb, >>>>> + resv, mapping->start, mapping->last, >>>>> + update_flags, mapping->offset, >>>>> + vram_base, mem, pages_addr, >>>>> + last_update); >>>>> if (r) >>>>> return r; >>>>> } >>>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device >>>>> *adev, struct amdgpu_bo_va *bo_va, >>>>> list_splice_init(&bo_va->invalids, &bo_va->valids); >>>>> bo_va->cleared = clear; >>>>> + bo_va->base.moved = false; >>>>> if (trace_amdgpu_vm_bo_mapping_enabled()) { >>>>> list_for_each_entry(mapping, &bo_va->valids, list) >>>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct >>>>> amdgpu_device *adev, >>>>> mapping->start < AMDGPU_GMC_HOLE_START) >>>>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC; >>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>>>> false, >>>>> - resv, mapping->start, >>>>> - mapping->last, init_pte_value, >>>>> - 0, NULL, NULL, &f); >>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, true, >>>>> resv, >>>>> + mapping->start, mapping->last, >>>>> + init_pte_value, 0, 0, NULL, NULL, >>>>> + &f); >>>>> amdgpu_vm_free_mapping(adev, vm, mapping, f); >>>>> if (r) { >>>>> dma_fence_put(f); >>>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct >>>>> amdgpu_device *adev, u32 pasid, >>>>> goto error_unlock; >>>>> } >>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, >>>>> false, NULL, addr, >>>>> - addr, flags, value, NULL, NULL, NULL); >>>>> + r = amdgpu_vm_update_range(adev, vm, true, false, false, >>>>> NULL, addr, >>>>> + addr, flags, value, 0, NULL, NULL, NULL); >>>>> if (r) >>>>> goto error_unlock; >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>> index 6b7682fe84f8..1a814fbffff8 100644 >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct >>>>> amdgpu_device *adev, >>>>> struct amdgpu_vm *vm); >>>>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, >>>>> struct amdgpu_vm *vm, struct amdgpu_bo *bo); >>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >>>>> - struct amdgpu_device *bo_adev, >>>>> - struct amdgpu_vm *vm, bool immediate, >>>>> - bool unlocked, struct dma_resv *resv, >>>>> - uint64_t start, uint64_t last, >>>>> - uint64_t flags, uint64_t offset, >>>>> - struct ttm_resource *res, >>>>> - dma_addr_t *pages_addr, >>>>> - struct dma_fence **fence); >>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct >>>>> amdgpu_vm *vm, >>>>> + bool immediate, bool unlocked, bool flush_tlb, >>>>> + struct dma_resv *resv, uint64_t start, uint64_t last, >>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base, >>>>> + struct ttm_resource *res, dma_addr_t *pages_addr, >>>>> + struct dma_fence **fence); >>>>> int amdgpu_vm_bo_update(struct amdgpu_device *adev, >>>>> struct amdgpu_bo_va *bo_va, >>>>> bool clear); >>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>>> index 27533f6057e0..907b02045824 100644 >>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct >>>>> amdgpu_device *adev, struct amdgpu_vm *vm, >>>>> pr_debug("[0x%llx 0x%llx]\n", start, last); >>>>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>>>> true, NULL, >>>>> - start, last, init_pte_value, 0, >>>>> - NULL, NULL, fence); >>>>> + return amdgpu_vm_update_range(adev, vm, false, true, true, >>>>> NULL, start, >>>>> + last, init_pte_value, 0, 0, NULL, NULL, >>>>> + fence); >>>>> } >>>>> static int >>>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct >>>>> kfd_process_device *pdd, struct svm_range *prange, >>>>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, >>>>> pte_flags); >>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, >>>>> false, false, >>>>> - NULL, last_start, >>>>> - prange->start + i, pte_flags, >>>>> - last_start - prange->start, >>>>> - NULL, dma_addr, >>>>> - &vm->last_update); >>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, false, >>>>> NULL, >>>>> + last_start, prange->start + i, >>>>> + pte_flags, >>>>> + last_start - prange->start, >>>>> + bo_adev->vm_manager.vram_base_offset, >>>>> + NULL, dma_addr, &vm->last_update); >>>>> for (j = last_start - prange->start; j <= i; j++) >>>>> dma_addr[j] |= last_domain; >>> > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction 2022-04-01 19:47 ` Felix Kuehling @ 2022-04-03 15:09 ` Christian König 0 siblings, 0 replies; 9+ messages in thread From: Christian König @ 2022-04-03 15:09 UTC (permalink / raw) To: Felix Kuehling, Christian König, philip yang, amd-gfx, Philip.Yang Am 01.04.22 um 21:47 schrieb Felix Kuehling: > > On 2022-04-01 04:24, Christian König wrote: >> Am 31.03.22 um 16:37 schrieb Felix Kuehling: >>> Am 2022-03-31 um 02:27 schrieb Christian König: >>>> Am 30.03.22 um 22:51 schrieb philip yang: >>>>> >>>>> >>>>> On 2022-03-30 05:00, Christian König wrote: >>>>>> Testing the valid bit is not enough to figure out if we >>>>>> need to invalidate the TLB or not. >>>>>> >>>>>> During eviction it is quite likely that we move a BO from VRAM to >>>>>> GTT and >>>>>> update the page tables immediately to the new GTT address. >>>>>> >>>>>> Rework the whole function to get all the necessary parameters >>>>>> directly as >>>>>> value. >>>>>> >>>>>> Signed-off-by: Christian König<christian.koenig@amd.com> >>>>>> --- >>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 >>>>>> ++++++++++++++------------ >>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++--- >>>>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++---- >>>>>> 3 files changed, 48 insertions(+), 48 deletions(-) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>>> index 9992a7311387..1cac90ee3318 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct >>>>>> dma_fence *fence, >>>>>> } >>>>>> /** >>>>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page >>>>>> table >>>>>> + * amdgpu_vm_update_range - update a range in the vm page table >>>>>> * >>>>>> - * @adev: amdgpu_device pointer of the VM >>>>>> - * @bo_adev: amdgpu_device pointer of the mapped BO >>>>>> - * @vm: requested vm >>>>>> + * @adev: amdgpu_device pointer to use for commands >>>>>> + * @vm: the VM to update the range >>>>>> * @immediate: immediate submission in a page fault >>>>>> * @unlocked: unlocked invalidation during MM callback >>>>>> + * @flush_tlb: trigger tlb invalidation after update completed >>>>>> * @resv: fences we need to sync to >>>>>> * @start: start of mapped range >>>>>> * @last: last mapped entry >>>>>> * @flags: flags for the entries >>>>>> * @offset: offset into nodes and pages_addr >>>>>> + * @vram_base: base for vram mappings >>>>>> * @res: ttm_resource to map >>>>>> * @pages_addr: DMA addresses to use for mapping >>>>>> * @fence: optional resulting fence >>>>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct >>>>>> dma_fence *fence, >>>>>> * Fill in the page table entries between @start and @last. >>>>>> * >>>>>> * Returns: >>>>>> - * 0 for success, -EINVAL for failure. >>>>>> + * 0 for success, negative erro code for failure. >>>>>> */ >>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >>>>>> - struct amdgpu_device *bo_adev, >>>>>> - struct amdgpu_vm *vm, bool immediate, >>>>>> - bool unlocked, struct dma_resv *resv, >>>>>> - uint64_t start, uint64_t last, >>>>>> - uint64_t flags, uint64_t offset, >>>>>> - struct ttm_resource *res, >>>>>> - dma_addr_t *pages_addr, >>>>>> - struct dma_fence **fence) >>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct >>>>>> amdgpu_vm *vm, >>>>>> + bool immediate, bool unlocked, bool flush_tlb, >>>>>> + struct dma_resv *resv, uint64_t start, uint64_t >>>>>> last, >>>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base, >>>>>> + struct ttm_resource *res, dma_addr_t *pages_addr, >>>>>> + struct dma_fence **fence) >>>>>> { >>>>>> struct amdgpu_vm_update_params params; >>>>>> struct amdgpu_vm_tlb_seq_cb *tlb_cb; >>>>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct >>>>>> amdgpu_device *adev, >>>>>> } >>>>>> } else if (flags & (AMDGPU_PTE_VALID | >>>>>> AMDGPU_PTE_PRT)) { >>>>>> - addr = bo_adev->vm_manager.vram_base_offset + >>>>>> - cursor.start; >>>>>> + addr = vram_base + cursor.start; >>>>>> } else { >>>>>> addr = 0; >>>>>> } >>>>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct >>>>>> amdgpu_device *adev, >>>>>> r = vm->update_funcs->commit(¶ms, fence); >>>>>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) { >>>>>> + if (flush_tlb || params.table_freed) { >>>>> >>>>> All change look good to me, but when I look at previous changes >>>>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this >>>>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)). >>>>> >>>> >>>> That was intentionally dropped as Felix said it wouldn't be >>>> necessary any more with the TLB flush rework. >>>> >>>> Is that really causing any trouble? >>> >>> I discussed it with Philip offline. The TLB flushing in >>> kfd_ioctl_map_memory_to_gpu is still there, but it is no longer >>> conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb >>> checks the sequence number to find out if the flush is needed >>> (presumably because we didn't flush after unmap). >>> >>> There is one case on Vega20+XGMI where PTEs get inadvertently cached >>> in L2 texture cache. >> >> Ah, that one. Yeah I do remember that issue. >> >>> In that case, we probably need to flush more frequently because a >>> cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb >>> would need to ignore the sequence number and heavy-weight flush TLB >>> unconditionally in this case. >> >> Ok, but I think that is outside of the scope of the VM handling then. >> Or should we somehow handle that in the VM code as well? > > I think it would make sense to apply the workaround in the VM code > now. You'd need to do this on all mappings on Vega20+XGMI. It doesn't > matter whether the mapping itself involves XGMI. AIUI, the incorrect > caching happens for all mappings when the XGMI bridge is physically > present. Good point! Looks like Philip already send a patch for this, going to review that one then. Thanks, Christian. > > >> >> I mean incrementing the sequence when the involved BO is mapped >> through XGMI is trivial. I just can't easily signal that we need a >> heavy-weight flush. > > There is already code in gmc_v9_0.c to force heavy-weight flushing, > and doing an double flush to make sure the TLB is flushed after the L2 > texture cache. grep -A4 "Vega20+XGMI" gmc_v9_0.c for details. > > Regards, > Felix > > >> >> Thanks, >> Christian. >> >>> >>> Regards, >>> Felix >>> >>>> >>>> Christian. >>>> >>>>> To fix it, seems we need beow change, then pass flush_tlb flag via >>>>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte >>>>> -> amdgpu_vm_bo_update >>>>> >>>>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct >>>>> amdgpu_bo_va *bo_va, >>>>> bool clear) >>>>> >>>>> - bool flush_tlb = clear; >>>>> >>>>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct >>>>> amdgpu_bo_va *bo_va, >>>>> bool clear, bool flush_tlb) >>>>> >>>>> + flush_tlb |= clear; >>>>> >>>>> Regards, >>>>> >>>>> Philip >>>>> >>>>>> tlb_cb->vm = vm; >>>>>> if (!fence || !*fence || >>>>>> dma_fence_add_callback(*fence, &tlb_cb->cb, >>>>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct >>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>>>> dma_addr_t *pages_addr = NULL; >>>>>> struct ttm_resource *mem; >>>>>> struct dma_fence **last_update; >>>>>> + bool flush_tlb = clear; >>>>>> struct dma_resv *resv; >>>>>> + uint64_t vram_base; >>>>>> uint64_t flags; >>>>>> - struct amdgpu_device *bo_adev = adev; >>>>>> int r; >>>>>> if (clear || !bo) { >>>>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct >>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>>>> } >>>>>> if (bo) { >>>>>> + struct amdgpu_device *bo_adev = adev; >>>>>> + >>>>>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); >>>>>> if (amdgpu_bo_encrypted(bo)) >>>>>> flags |= AMDGPU_PTE_TMZ; >>>>>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); >>>>>> + vram_base = bo_adev->vm_manager.vram_base_offset; >>>>>> } else { >>>>>> flags = 0x0; >>>>>> + vram_base = 0; >>>>>> } >>>>>> if (clear || (bo && bo->tbo.base.resv == >>>>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct >>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>>>> last_update = &bo_va->last_pt_update; >>>>>> if (!clear && bo_va->base.moved) { >>>>>> - bo_va->base.moved = false; >>>>>> + flush_tlb = true; >>>>>> list_splice_init(&bo_va->valids, &bo_va->invalids); >>>>>> } else if (bo_va->cleared != clear) { >>>>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct >>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>>>> trace_amdgpu_vm_bo_update(mapping); >>>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, >>>>>> false, false, >>>>>> - resv, mapping->start, >>>>>> - mapping->last, update_flags, >>>>>> - mapping->offset, mem, >>>>>> - pages_addr, last_update); >>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, >>>>>> flush_tlb, >>>>>> + resv, mapping->start, mapping->last, >>>>>> + update_flags, mapping->offset, >>>>>> + vram_base, mem, pages_addr, >>>>>> + last_update); >>>>>> if (r) >>>>>> return r; >>>>>> } >>>>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct >>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va, >>>>>> list_splice_init(&bo_va->invalids, &bo_va->valids); >>>>>> bo_va->cleared = clear; >>>>>> + bo_va->base.moved = false; >>>>>> if (trace_amdgpu_vm_bo_mapping_enabled()) { >>>>>> list_for_each_entry(mapping, &bo_va->valids, list) >>>>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct >>>>>> amdgpu_device *adev, >>>>>> mapping->start < AMDGPU_GMC_HOLE_START) >>>>>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC; >>>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>>>>> false, >>>>>> - resv, mapping->start, >>>>>> - mapping->last, init_pte_value, >>>>>> - 0, NULL, NULL, &f); >>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, true, >>>>>> resv, >>>>>> + mapping->start, mapping->last, >>>>>> + init_pte_value, 0, 0, NULL, NULL, >>>>>> + &f); >>>>>> amdgpu_vm_free_mapping(adev, vm, mapping, f); >>>>>> if (r) { >>>>>> dma_fence_put(f); >>>>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct >>>>>> amdgpu_device *adev, u32 pasid, >>>>>> goto error_unlock; >>>>>> } >>>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, >>>>>> false, NULL, addr, >>>>>> - addr, flags, value, NULL, NULL, NULL); >>>>>> + r = amdgpu_vm_update_range(adev, vm, true, false, false, >>>>>> NULL, addr, >>>>>> + addr, flags, value, 0, NULL, NULL, NULL); >>>>>> if (r) >>>>>> goto error_unlock; >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>>> index 6b7682fe84f8..1a814fbffff8 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>>>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct >>>>>> amdgpu_device *adev, >>>>>> struct amdgpu_vm *vm); >>>>>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, >>>>>> struct amdgpu_vm *vm, struct amdgpu_bo *bo); >>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, >>>>>> - struct amdgpu_device *bo_adev, >>>>>> - struct amdgpu_vm *vm, bool immediate, >>>>>> - bool unlocked, struct dma_resv *resv, >>>>>> - uint64_t start, uint64_t last, >>>>>> - uint64_t flags, uint64_t offset, >>>>>> - struct ttm_resource *res, >>>>>> - dma_addr_t *pages_addr, >>>>>> - struct dma_fence **fence); >>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct >>>>>> amdgpu_vm *vm, >>>>>> + bool immediate, bool unlocked, bool flush_tlb, >>>>>> + struct dma_resv *resv, uint64_t start, uint64_t >>>>>> last, >>>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base, >>>>>> + struct ttm_resource *res, dma_addr_t *pages_addr, >>>>>> + struct dma_fence **fence); >>>>>> int amdgpu_vm_bo_update(struct amdgpu_device *adev, >>>>>> struct amdgpu_bo_va *bo_va, >>>>>> bool clear); >>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>>>> index 27533f6057e0..907b02045824 100644 >>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>>>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct >>>>>> amdgpu_device *adev, struct amdgpu_vm *vm, >>>>>> pr_debug("[0x%llx 0x%llx]\n", start, last); >>>>>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, >>>>>> true, NULL, >>>>>> - start, last, init_pte_value, 0, >>>>>> - NULL, NULL, fence); >>>>>> + return amdgpu_vm_update_range(adev, vm, false, true, true, >>>>>> NULL, start, >>>>>> + last, init_pte_value, 0, 0, NULL, NULL, >>>>>> + fence); >>>>>> } >>>>>> static int >>>>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct >>>>>> kfd_process_device *pdd, struct svm_range *prange, >>>>>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, >>>>>> pte_flags); >>>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, >>>>>> false, false, >>>>>> - NULL, last_start, >>>>>> - prange->start + i, pte_flags, >>>>>> - last_start - prange->start, >>>>>> - NULL, dma_addr, >>>>>> - &vm->last_update); >>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, >>>>>> false, NULL, >>>>>> + last_start, prange->start + i, >>>>>> + pte_flags, >>>>>> + last_start - prange->start, >>>>>> + bo_adev->vm_manager.vram_base_offset, >>>>>> + NULL, dma_addr, &vm->last_update); >>>>>> for (j = last_start - prange->start; j <= i; j++) >>>>>> dma_addr[j] |= last_domain; >>>> >> ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-04-03 15:09 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-03-30 9:00 [PATCH] drm/amdgpu: fix TLB flushing during eviction Christian König 2022-03-30 20:51 ` philip yang 2022-03-31 6:27 ` Christian König 2022-03-31 14:37 ` Felix Kuehling 2022-03-31 15:42 ` philip yang 2022-03-31 15:46 ` philip yang 2022-04-01 8:24 ` Christian König 2022-04-01 19:47 ` Felix Kuehling 2022-04-03 15:09 ` Christian König
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.