* [PATCH] drm/amdgpu: fix TLB flushing during eviction
@ 2022-03-30 9:00 Christian König
2022-03-30 20:51 ` philip yang
0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2022-03-30 9:00 UTC (permalink / raw)
To: amd-gfx, Philip.Yang, felix.kuehling; +Cc: Christian König
Testing the valid bit is not enough to figure out if we
need to invalidate the TLB or not.
During eviction it is quite likely that we move a BO from VRAM to GTT and
update the page tables immediately to the new GTT address.
Rework the whole function to get all the necessary parameters directly as
value.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 ++++++++++++++------------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++----
3 files changed, 48 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9992a7311387..1cac90ee3318 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
}
/**
- * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
+ * amdgpu_vm_update_range - update a range in the vm page table
*
- * @adev: amdgpu_device pointer of the VM
- * @bo_adev: amdgpu_device pointer of the mapped BO
- * @vm: requested vm
+ * @adev: amdgpu_device pointer to use for commands
+ * @vm: the VM to update the range
* @immediate: immediate submission in a page fault
* @unlocked: unlocked invalidation during MM callback
+ * @flush_tlb: trigger tlb invalidation after update completed
* @resv: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
* @offset: offset into nodes and pages_addr
+ * @vram_base: base for vram mappings
* @res: ttm_resource to map
* @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
@@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
* Fill in the page table entries between @start and @last.
*
* Returns:
- * 0 for success, -EINVAL for failure.
+ * 0 for success, negative erro code for failure.
*/
-int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev,
- struct amdgpu_vm *vm, bool immediate,
- bool unlocked, struct dma_resv *resv,
- uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset,
- struct ttm_resource *res,
- dma_addr_t *pages_addr,
- struct dma_fence **fence)
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ struct dma_resv *resv, uint64_t start, uint64_t last,
+ uint64_t flags, uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_cb *tlb_cb;
@@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
}
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
- addr = bo_adev->vm_manager.vram_base_offset +
- cursor.start;
+ addr = vram_base + cursor.start;
} else {
addr = 0;
}
@@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
r = vm->update_funcs->commit(¶ms, fence);
- if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
+ if (flush_tlb || params.table_freed) {
tlb_cb->vm = vm;
if (!fence || !*fence ||
dma_fence_add_callback(*fence, &tlb_cb->cb,
@@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
dma_addr_t *pages_addr = NULL;
struct ttm_resource *mem;
struct dma_fence **last_update;
+ bool flush_tlb = clear;
struct dma_resv *resv;
+ uint64_t vram_base;
uint64_t flags;
- struct amdgpu_device *bo_adev = adev;
int r;
if (clear || !bo) {
@@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
}
if (bo) {
+ struct amdgpu_device *bo_adev = adev;
+
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
if (amdgpu_bo_encrypted(bo))
flags |= AMDGPU_PTE_TMZ;
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ vram_base = bo_adev->vm_manager.vram_base_offset;
} else {
flags = 0x0;
+ vram_base = 0;
}
if (clear || (bo && bo->tbo.base.resv ==
@@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
last_update = &bo_va->last_pt_update;
if (!clear && bo_va->base.moved) {
- bo_va->base.moved = false;
+ flush_tlb = true;
list_splice_init(&bo_va->valids, &bo_va->invalids);
} else if (bo_va->cleared != clear) {
@@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_update(mapping);
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
- resv, mapping->start,
- mapping->last, update_flags,
- mapping->offset, mem,
- pages_addr, last_update);
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
+ resv, mapping->start, mapping->last,
+ update_flags, mapping->offset,
+ vram_base, mem, pages_addr,
+ last_update);
if (r)
return r;
}
@@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
+ bo_va->base.moved = false;
if (trace_amdgpu_vm_bo_mapping_enabled()) {
list_for_each_entry(mapping, &bo_va->valids, list)
@@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
- r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
- resv, mapping->start,
- mapping->last, init_pte_value,
- 0, NULL, NULL, &f);
+ r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
+ mapping->start, mapping->last,
+ init_pte_value, 0, 0, NULL, NULL,
+ &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
goto error_unlock;
}
- r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
- addr, flags, value, NULL, NULL, NULL);
+ r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
+ addr, flags, value, 0, NULL, NULL, NULL);
if (r)
goto error_unlock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 6b7682fe84f8..1a814fbffff8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
-int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev,
- struct amdgpu_vm *vm, bool immediate,
- bool unlocked, struct dma_resv *resv,
- uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset,
- struct ttm_resource *res,
- dma_addr_t *pages_addr,
- struct dma_fence **fence);
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ struct dma_resv *resv, uint64_t start, uint64_t last,
+ uint64_t flags, uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 27533f6057e0..907b02045824 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("[0x%llx 0x%llx]\n", start, last);
- return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
- start, last, init_pte_value, 0,
- NULL, NULL, fence);
+ return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
+ last, init_pte_value, 0, 0, NULL, NULL,
+ fence);
}
static int
@@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
(last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
pte_flags);
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
- NULL, last_start,
- prange->start + i, pte_flags,
- last_start - prange->start,
- NULL, dma_addr,
- &vm->last_update);
+ r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
+ last_start, prange->start + i,
+ pte_flags,
+ last_start - prange->start,
+ bo_adev->vm_manager.vram_base_offset,
+ NULL, dma_addr, &vm->last_update);
for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;
--
2.25.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
2022-03-30 9:00 [PATCH] drm/amdgpu: fix TLB flushing during eviction Christian König
@ 2022-03-30 20:51 ` philip yang
2022-03-31 6:27 ` Christian König
0 siblings, 1 reply; 9+ messages in thread
From: philip yang @ 2022-03-30 20:51 UTC (permalink / raw)
To: Christian König, amd-gfx, Philip.Yang, felix.kuehling
Cc: Christian König
[-- Attachment #1: Type: text/html, Size: 10851 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
2022-03-30 20:51 ` philip yang
@ 2022-03-31 6:27 ` Christian König
2022-03-31 14:37 ` Felix Kuehling
0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2022-03-31 6:27 UTC (permalink / raw)
To: philip yang, amd-gfx, Philip.Yang, felix.kuehling; +Cc: Christian König
Am 30.03.22 um 22:51 schrieb philip yang:
>
>
> On 2022-03-30 05:00, Christian König wrote:
>> Testing the valid bit is not enough to figure out if we
>> need to invalidate the TLB or not.
>>
>> During eviction it is quite likely that we move a BO from VRAM to GTT and
>> update the page tables immediately to the new GTT address.
>>
>> Rework the whole function to get all the necessary parameters directly as
>> value.
>>
>> Signed-off-by: Christian König<christian.koenig@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 ++++++++++++++------------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++----
>> 3 files changed, 48 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9992a7311387..1cac90ee3318 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
>> }
>>
>> /**
>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
>> + * amdgpu_vm_update_range - update a range in the vm page table
>> *
>> - * @adev: amdgpu_device pointer of the VM
>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>> - * @vm: requested vm
>> + * @adev: amdgpu_device pointer to use for commands
>> + * @vm: the VM to update the range
>> * @immediate: immediate submission in a page fault
>> * @unlocked: unlocked invalidation during MM callback
>> + * @flush_tlb: trigger tlb invalidation after update completed
>> * @resv: fences we need to sync to
>> * @start: start of mapped range
>> * @last: last mapped entry
>> * @flags: flags for the entries
>> * @offset: offset into nodes and pages_addr
>> + * @vram_base: base for vram mappings
>> * @res: ttm_resource to map
>> * @pages_addr: DMA addresses to use for mapping
>> * @fence: optional resulting fence
>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
>> * Fill in the page table entries between @start and @last.
>> *
>> * Returns:
>> - * 0 for success, -EINVAL for failure.
>> + * 0 for success, negative erro code for failure.
>> */
>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> - struct amdgpu_device *bo_adev,
>> - struct amdgpu_vm *vm, bool immediate,
>> - bool unlocked, struct dma_resv *resv,
>> - uint64_t start, uint64_t last,
>> - uint64_t flags, uint64_t offset,
>> - struct ttm_resource *res,
>> - dma_addr_t *pages_addr,
>> - struct dma_fence **fence)
>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> + bool immediate, bool unlocked, bool flush_tlb,
>> + struct dma_resv *resv, uint64_t start, uint64_t last,
>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>> + struct dma_fence **fence)
>> {
>> struct amdgpu_vm_update_params params;
>> struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> }
>>
>> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> - addr = bo_adev->vm_manager.vram_base_offset +
>> - cursor.start;
>> + addr = vram_base + cursor.start;
>> } else {
>> addr = 0;
>> }
>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>
>> r = vm->update_funcs->commit(¶ms, fence);
>>
>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>> + if (flush_tlb || params.table_freed) {
>
> All change look good to me, but when I look at previous changes again,
> kfd_ioctl_map_memory_to_gpu is not correct now, as this should flush
> TLB if (!kfd_flush_tlb_after_unmap(dev)).
>
That was intentionally dropped as Felix said it wouldn't be necessary
any more with the TLB flush rework.
Is that really causing any trouble?
Christian.
> To fix it, seems we need beow change, then pass flush_tlb flag via
> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte ->
> amdgpu_vm_bo_update
>
> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
> amdgpu_bo_va *bo_va,
> bool clear)
>
> - bool flush_tlb = clear;
>
> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
> amdgpu_bo_va *bo_va,
> bool clear, bool flush_tlb)
>
> + flush_tlb |= clear;
>
> Regards,
>
> Philip
>
>> tlb_cb->vm = vm;
>> if (!fence || !*fence ||
>> dma_fence_add_callback(*fence, &tlb_cb->cb,
>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>> dma_addr_t *pages_addr = NULL;
>> struct ttm_resource *mem;
>> struct dma_fence **last_update;
>> + bool flush_tlb = clear;
>> struct dma_resv *resv;
>> + uint64_t vram_base;
>> uint64_t flags;
>> - struct amdgpu_device *bo_adev = adev;
>> int r;
>>
>> if (clear || !bo) {
>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>> }
>>
>> if (bo) {
>> + struct amdgpu_device *bo_adev = adev;
>> +
>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>
>> if (amdgpu_bo_encrypted(bo))
>> flags |= AMDGPU_PTE_TMZ;
>>
>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> + vram_base = bo_adev->vm_manager.vram_base_offset;
>> } else {
>> flags = 0x0;
>> + vram_base = 0;
>> }
>>
>> if (clear || (bo && bo->tbo.base.resv ==
>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>> last_update = &bo_va->last_pt_update;
>>
>> if (!clear && bo_va->base.moved) {
>> - bo_va->base.moved = false;
>> + flush_tlb = true;
>> list_splice_init(&bo_va->valids, &bo_va->invalids);
>>
>> } else if (bo_va->cleared != clear) {
>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>
>> trace_amdgpu_vm_bo_update(mapping);
>>
>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>> - resv, mapping->start,
>> - mapping->last, update_flags,
>> - mapping->offset, mem,
>> - pages_addr, last_update);
>> + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
>> + resv, mapping->start, mapping->last,
>> + update_flags, mapping->offset,
>> + vram_base, mem, pages_addr,
>> + last_update);
>> if (r)
>> return r;
>> }
>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>
>> list_splice_init(&bo_va->invalids, &bo_va->valids);
>> bo_va->cleared = clear;
>> + bo_va->base.moved = false;
>>
>> if (trace_amdgpu_vm_bo_mapping_enabled()) {
>> list_for_each_entry(mapping, &bo_va->valids, list)
>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>> mapping->start < AMDGPU_GMC_HOLE_START)
>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>
>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
>> - resv, mapping->start,
>> - mapping->last, init_pte_value,
>> - 0, NULL, NULL, &f);
>> + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
>> + mapping->start, mapping->last,
>> + init_pte_value, 0, 0, NULL, NULL,
>> + &f);
>> amdgpu_vm_free_mapping(adev, vm, mapping, f);
>> if (r) {
>> dma_fence_put(f);
>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>> goto error_unlock;
>> }
>>
>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
>> - addr, flags, value, NULL, NULL, NULL);
>> + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
>> + addr, flags, value, 0, NULL, NULL, NULL);
>> if (r)
>> goto error_unlock;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 6b7682fe84f8..1a814fbffff8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>> struct amdgpu_vm *vm);
>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>> struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> - struct amdgpu_device *bo_adev,
>> - struct amdgpu_vm *vm, bool immediate,
>> - bool unlocked, struct dma_resv *resv,
>> - uint64_t start, uint64_t last,
>> - uint64_t flags, uint64_t offset,
>> - struct ttm_resource *res,
>> - dma_addr_t *pages_addr,
>> - struct dma_fence **fence);
>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> + bool immediate, bool unlocked, bool flush_tlb,
>> + struct dma_resv *resv, uint64_t start, uint64_t last,
>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>> + struct dma_fence **fence);
>> int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>> struct amdgpu_bo_va *bo_va,
>> bool clear);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 27533f6057e0..907b02045824 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>
>> pr_debug("[0x%llx 0x%llx]\n", start, last);
>>
>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
>> - start, last, init_pte_value, 0,
>> - NULL, NULL, fence);
>> + return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
>> + last, init_pte_value, 0, 0, NULL, NULL,
>> + fence);
>> }
>>
>> static int
>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>> pte_flags);
>>
>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>> - NULL, last_start,
>> - prange->start + i, pte_flags,
>> - last_start - prange->start,
>> - NULL, dma_addr,
>> - &vm->last_update);
>> + r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
>> + last_start, prange->start + i,
>> + pte_flags,
>> + last_start - prange->start,
>> + bo_adev->vm_manager.vram_base_offset,
>> + NULL, dma_addr, &vm->last_update);
>>
>> for (j = last_start - prange->start; j <= i; j++)
>> dma_addr[j] |= last_domain;
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
2022-03-31 6:27 ` Christian König
@ 2022-03-31 14:37 ` Felix Kuehling
2022-03-31 15:42 ` philip yang
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Felix Kuehling @ 2022-03-31 14:37 UTC (permalink / raw)
To: Christian König, philip yang, amd-gfx, Philip.Yang
Cc: Christian König
Am 2022-03-31 um 02:27 schrieb Christian König:
> Am 30.03.22 um 22:51 schrieb philip yang:
>>
>>
>> On 2022-03-30 05:00, Christian König wrote:
>>> Testing the valid bit is not enough to figure out if we
>>> need to invalidate the TLB or not.
>>>
>>> During eviction it is quite likely that we move a BO from VRAM to
>>> GTT and
>>> update the page tables immediately to the new GTT address.
>>>
>>> Rework the whole function to get all the necessary parameters
>>> directly as
>>> value.
>>>
>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63
>>> ++++++++++++++------------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++----
>>> 3 files changed, 48 insertions(+), 48 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 9992a7311387..1cac90ee3318 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct
>>> dma_fence *fence,
>>> }
>>> /**
>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
>>> + * amdgpu_vm_update_range - update a range in the vm page table
>>> *
>>> - * @adev: amdgpu_device pointer of the VM
>>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>>> - * @vm: requested vm
>>> + * @adev: amdgpu_device pointer to use for commands
>>> + * @vm: the VM to update the range
>>> * @immediate: immediate submission in a page fault
>>> * @unlocked: unlocked invalidation during MM callback
>>> + * @flush_tlb: trigger tlb invalidation after update completed
>>> * @resv: fences we need to sync to
>>> * @start: start of mapped range
>>> * @last: last mapped entry
>>> * @flags: flags for the entries
>>> * @offset: offset into nodes and pages_addr
>>> + * @vram_base: base for vram mappings
>>> * @res: ttm_resource to map
>>> * @pages_addr: DMA addresses to use for mapping
>>> * @fence: optional resulting fence
>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct
>>> dma_fence *fence,
>>> * Fill in the page table entries between @start and @last.
>>> *
>>> * Returns:
>>> - * 0 for success, -EINVAL for failure.
>>> + * 0 for success, negative erro code for failure.
>>> */
>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>> - struct amdgpu_device *bo_adev,
>>> - struct amdgpu_vm *vm, bool immediate,
>>> - bool unlocked, struct dma_resv *resv,
>>> - uint64_t start, uint64_t last,
>>> - uint64_t flags, uint64_t offset,
>>> - struct ttm_resource *res,
>>> - dma_addr_t *pages_addr,
>>> - struct dma_fence **fence)
>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct
>>> amdgpu_vm *vm,
>>> + bool immediate, bool unlocked, bool flush_tlb,
>>> + struct dma_resv *resv, uint64_t start, uint64_t last,
>>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>>> + struct dma_fence **fence)
>>> {
>>> struct amdgpu_vm_update_params params;
>>> struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>> amdgpu_device *adev,
>>> }
>>> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>> - addr = bo_adev->vm_manager.vram_base_offset +
>>> - cursor.start;
>>> + addr = vram_base + cursor.start;
>>> } else {
>>> addr = 0;
>>> }
>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>> amdgpu_device *adev,
>>> r = vm->update_funcs->commit(¶ms, fence);
>>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>>> + if (flush_tlb || params.table_freed) {
>>
>> All change look good to me, but when I look at previous changes
>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this should
>> flush TLB if (!kfd_flush_tlb_after_unmap(dev)).
>>
>
> That was intentionally dropped as Felix said it wouldn't be necessary
> any more with the TLB flush rework.
>
> Is that really causing any trouble?
I discussed it with Philip offline. The TLB flushing in
kfd_ioctl_map_memory_to_gpu is still there, but it is no longer
conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb checks
the sequence number to find out if the flush is needed (presumably
because we didn't flush after unmap).
There is one case on Vega20+XGMI where PTEs get inadvertently cached in
L2 texture cache. In that case, we probably need to flush more
frequently because a cache line in L2 may contain stale invalid PTEs. So
kfd_flush_tlb would need to ignore the sequence number and heavy-weight
flush TLB unconditionally in this case.
Regards,
Felix
>
> Christian.
>
>> To fix it, seems we need beow change, then pass flush_tlb flag via
>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte ->
>> amdgpu_vm_bo_update
>>
>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
>> amdgpu_bo_va *bo_va,
>> bool clear)
>>
>> - bool flush_tlb = clear;
>>
>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
>> amdgpu_bo_va *bo_va,
>> bool clear, bool flush_tlb)
>>
>> + flush_tlb |= clear;
>>
>> Regards,
>>
>> Philip
>>
>>> tlb_cb->vm = vm;
>>> if (!fence || !*fence ||
>>> dma_fence_add_callback(*fence, &tlb_cb->cb,
>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>> *adev, struct amdgpu_bo_va *bo_va,
>>> dma_addr_t *pages_addr = NULL;
>>> struct ttm_resource *mem;
>>> struct dma_fence **last_update;
>>> + bool flush_tlb = clear;
>>> struct dma_resv *resv;
>>> + uint64_t vram_base;
>>> uint64_t flags;
>>> - struct amdgpu_device *bo_adev = adev;
>>> int r;
>>> if (clear || !bo) {
>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>> *adev, struct amdgpu_bo_va *bo_va,
>>> }
>>> if (bo) {
>>> + struct amdgpu_device *bo_adev = adev;
>>> +
>>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>> if (amdgpu_bo_encrypted(bo))
>>> flags |= AMDGPU_PTE_TMZ;
>>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> + vram_base = bo_adev->vm_manager.vram_base_offset;
>>> } else {
>>> flags = 0x0;
>>> + vram_base = 0;
>>> }
>>> if (clear || (bo && bo->tbo.base.resv ==
>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>> *adev, struct amdgpu_bo_va *bo_va,
>>> last_update = &bo_va->last_pt_update;
>>> if (!clear && bo_va->base.moved) {
>>> - bo_va->base.moved = false;
>>> + flush_tlb = true;
>>> list_splice_init(&bo_va->valids, &bo_va->invalids);
>>> } else if (bo_va->cleared != clear) {
>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>> *adev, struct amdgpu_bo_va *bo_va,
>>> trace_amdgpu_vm_bo_update(mapping);
>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false,
>>> false,
>>> - resv, mapping->start,
>>> - mapping->last, update_flags,
>>> - mapping->offset, mem,
>>> - pages_addr, last_update);
>>> + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
>>> + resv, mapping->start, mapping->last,
>>> + update_flags, mapping->offset,
>>> + vram_base, mem, pages_addr,
>>> + last_update);
>>> if (r)
>>> return r;
>>> }
>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>> *adev, struct amdgpu_bo_va *bo_va,
>>> list_splice_init(&bo_va->invalids, &bo_va->valids);
>>> bo_va->cleared = clear;
>>> + bo_va->base.moved = false;
>>> if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>> list_for_each_entry(mapping, &bo_va->valids, list)
>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct
>>> amdgpu_device *adev,
>>> mapping->start < AMDGPU_GMC_HOLE_START)
>>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>> false,
>>> - resv, mapping->start,
>>> - mapping->last, init_pte_value,
>>> - 0, NULL, NULL, &f);
>>> + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
>>> + mapping->start, mapping->last,
>>> + init_pte_value, 0, 0, NULL, NULL,
>>> + &f);
>>> amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>> if (r) {
>>> dma_fence_put(f);
>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct
>>> amdgpu_device *adev, u32 pasid,
>>> goto error_unlock;
>>> }
>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false,
>>> NULL, addr,
>>> - addr, flags, value, NULL, NULL, NULL);
>>> + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL,
>>> addr,
>>> + addr, flags, value, 0, NULL, NULL, NULL);
>>> if (r)
>>> goto error_unlock;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 6b7682fe84f8..1a814fbffff8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct
>>> amdgpu_device *adev,
>>> struct amdgpu_vm *vm);
>>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>> struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>> - struct amdgpu_device *bo_adev,
>>> - struct amdgpu_vm *vm, bool immediate,
>>> - bool unlocked, struct dma_resv *resv,
>>> - uint64_t start, uint64_t last,
>>> - uint64_t flags, uint64_t offset,
>>> - struct ttm_resource *res,
>>> - dma_addr_t *pages_addr,
>>> - struct dma_fence **fence);
>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct
>>> amdgpu_vm *vm,
>>> + bool immediate, bool unlocked, bool flush_tlb,
>>> + struct dma_resv *resv, uint64_t start, uint64_t last,
>>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>>> + struct dma_fence **fence);
>>> int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>> struct amdgpu_bo_va *bo_va,
>>> bool clear);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index 27533f6057e0..907b02045824 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device
>>> *adev, struct amdgpu_vm *vm,
>>> pr_debug("[0x%llx 0x%llx]\n", start, last);
>>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>> true, NULL,
>>> - start, last, init_pte_value, 0,
>>> - NULL, NULL, fence);
>>> + return amdgpu_vm_update_range(adev, vm, false, true, true,
>>> NULL, start,
>>> + last, init_pte_value, 0, 0, NULL, NULL,
>>> + fence);
>>> }
>>> static int
>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct
>>> kfd_process_device *pdd, struct svm_range *prange,
>>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>> pte_flags);
>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false,
>>> false,
>>> - NULL, last_start,
>>> - prange->start + i, pte_flags,
>>> - last_start - prange->start,
>>> - NULL, dma_addr,
>>> - &vm->last_update);
>>> + r = amdgpu_vm_update_range(adev, vm, false, false, false,
>>> NULL,
>>> + last_start, prange->start + i,
>>> + pte_flags,
>>> + last_start - prange->start,
>>> + bo_adev->vm_manager.vram_base_offset,
>>> + NULL, dma_addr, &vm->last_update);
>>> for (j = last_start - prange->start; j <= i; j++)
>>> dma_addr[j] |= last_domain;
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
2022-03-31 14:37 ` Felix Kuehling
@ 2022-03-31 15:42 ` philip yang
2022-03-31 15:46 ` philip yang
2022-04-01 8:24 ` Christian König
2 siblings, 0 replies; 9+ messages in thread
From: philip yang @ 2022-03-31 15:42 UTC (permalink / raw)
To: Felix Kuehling, Christian König, amd-gfx, Philip.Yang
Cc: Christian König
[-- Attachment #1: Type: text/html, Size: 30125 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
2022-03-31 14:37 ` Felix Kuehling
2022-03-31 15:42 ` philip yang
@ 2022-03-31 15:46 ` philip yang
2022-04-01 8:24 ` Christian König
2 siblings, 0 replies; 9+ messages in thread
From: philip yang @ 2022-03-31 15:46 UTC (permalink / raw)
To: Felix Kuehling, Christian König, amd-gfx, Philip.Yang
Cc: Christian König
[-- Attachment #1: Type: text/html, Size: 29695 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
2022-03-31 14:37 ` Felix Kuehling
2022-03-31 15:42 ` philip yang
2022-03-31 15:46 ` philip yang
@ 2022-04-01 8:24 ` Christian König
2022-04-01 19:47 ` Felix Kuehling
2 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2022-04-01 8:24 UTC (permalink / raw)
To: Felix Kuehling, philip yang, amd-gfx, Philip.Yang; +Cc: Christian König
Am 31.03.22 um 16:37 schrieb Felix Kuehling:
> Am 2022-03-31 um 02:27 schrieb Christian König:
>> Am 30.03.22 um 22:51 schrieb philip yang:
>>>
>>>
>>> On 2022-03-30 05:00, Christian König wrote:
>>>> Testing the valid bit is not enough to figure out if we
>>>> need to invalidate the TLB or not.
>>>>
>>>> During eviction it is quite likely that we move a BO from VRAM to
>>>> GTT and
>>>> update the page tables immediately to the new GTT address.
>>>>
>>>> Rework the whole function to get all the necessary parameters
>>>> directly as
>>>> value.
>>>>
>>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63
>>>> ++++++++++++++------------
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++----
>>>> 3 files changed, 48 insertions(+), 48 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 9992a7311387..1cac90ee3318 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct
>>>> dma_fence *fence,
>>>> }
>>>> /**
>>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page
>>>> table
>>>> + * amdgpu_vm_update_range - update a range in the vm page table
>>>> *
>>>> - * @adev: amdgpu_device pointer of the VM
>>>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>>>> - * @vm: requested vm
>>>> + * @adev: amdgpu_device pointer to use for commands
>>>> + * @vm: the VM to update the range
>>>> * @immediate: immediate submission in a page fault
>>>> * @unlocked: unlocked invalidation during MM callback
>>>> + * @flush_tlb: trigger tlb invalidation after update completed
>>>> * @resv: fences we need to sync to
>>>> * @start: start of mapped range
>>>> * @last: last mapped entry
>>>> * @flags: flags for the entries
>>>> * @offset: offset into nodes and pages_addr
>>>> + * @vram_base: base for vram mappings
>>>> * @res: ttm_resource to map
>>>> * @pages_addr: DMA addresses to use for mapping
>>>> * @fence: optional resulting fence
>>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct
>>>> dma_fence *fence,
>>>> * Fill in the page table entries between @start and @last.
>>>> *
>>>> * Returns:
>>>> - * 0 for success, -EINVAL for failure.
>>>> + * 0 for success, negative erro code for failure.
>>>> */
>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>> - struct amdgpu_device *bo_adev,
>>>> - struct amdgpu_vm *vm, bool immediate,
>>>> - bool unlocked, struct dma_resv *resv,
>>>> - uint64_t start, uint64_t last,
>>>> - uint64_t flags, uint64_t offset,
>>>> - struct ttm_resource *res,
>>>> - dma_addr_t *pages_addr,
>>>> - struct dma_fence **fence)
>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct
>>>> amdgpu_vm *vm,
>>>> + bool immediate, bool unlocked, bool flush_tlb,
>>>> + struct dma_resv *resv, uint64_t start, uint64_t last,
>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>>>> + struct dma_fence **fence)
>>>> {
>>>> struct amdgpu_vm_update_params params;
>>>> struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>> }
>>>> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>>> - addr = bo_adev->vm_manager.vram_base_offset +
>>>> - cursor.start;
>>>> + addr = vram_base + cursor.start;
>>>> } else {
>>>> addr = 0;
>>>> }
>>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>> r = vm->update_funcs->commit(¶ms, fence);
>>>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>>>> + if (flush_tlb || params.table_freed) {
>>>
>>> All change look good to me, but when I look at previous changes
>>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this
>>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)).
>>>
>>
>> That was intentionally dropped as Felix said it wouldn't be necessary
>> any more with the TLB flush rework.
>>
>> Is that really causing any trouble?
>
> I discussed it with Philip offline. The TLB flushing in
> kfd_ioctl_map_memory_to_gpu is still there, but it is no longer
> conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb
> checks the sequence number to find out if the flush is needed
> (presumably because we didn't flush after unmap).
>
> There is one case on Vega20+XGMI where PTEs get inadvertently cached
> in L2 texture cache.
Ah, that one. Yeah I do remember that issue.
> In that case, we probably need to flush more frequently because a
> cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb
> would need to ignore the sequence number and heavy-weight flush TLB
> unconditionally in this case.
Ok, but I think that is outside of the scope of the VM handling then. Or
should we somehow handle that in the VM code as well?
I mean incrementing the sequence when the involved BO is mapped through
XGMI is trivial. I just can't easily signal that we need a heavy-weight
flush.
Thanks,
Christian.
>
> Regards,
> Felix
>
>>
>> Christian.
>>
>>> To fix it, seems we need beow change, then pass flush_tlb flag via
>>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte
>>> -> amdgpu_vm_bo_update
>>>
>>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
>>> amdgpu_bo_va *bo_va,
>>> bool clear)
>>>
>>> - bool flush_tlb = clear;
>>>
>>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
>>> amdgpu_bo_va *bo_va,
>>> bool clear, bool flush_tlb)
>>>
>>> + flush_tlb |= clear;
>>>
>>> Regards,
>>>
>>> Philip
>>>
>>>> tlb_cb->vm = vm;
>>>> if (!fence || !*fence ||
>>>> dma_fence_add_callback(*fence, &tlb_cb->cb,
>>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>> dma_addr_t *pages_addr = NULL;
>>>> struct ttm_resource *mem;
>>>> struct dma_fence **last_update;
>>>> + bool flush_tlb = clear;
>>>> struct dma_resv *resv;
>>>> + uint64_t vram_base;
>>>> uint64_t flags;
>>>> - struct amdgpu_device *bo_adev = adev;
>>>> int r;
>>>> if (clear || !bo) {
>>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct
>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>> }
>>>> if (bo) {
>>>> + struct amdgpu_device *bo_adev = adev;
>>>> +
>>>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>> if (amdgpu_bo_encrypted(bo))
>>>> flags |= AMDGPU_PTE_TMZ;
>>>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>> + vram_base = bo_adev->vm_manager.vram_base_offset;
>>>> } else {
>>>> flags = 0x0;
>>>> + vram_base = 0;
>>>> }
>>>> if (clear || (bo && bo->tbo.base.resv ==
>>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>> last_update = &bo_va->last_pt_update;
>>>> if (!clear && bo_va->base.moved) {
>>>> - bo_va->base.moved = false;
>>>> + flush_tlb = true;
>>>> list_splice_init(&bo_va->valids, &bo_va->invalids);
>>>> } else if (bo_va->cleared != clear) {
>>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct
>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>> trace_amdgpu_vm_bo_update(mapping);
>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm,
>>>> false, false,
>>>> - resv, mapping->start,
>>>> - mapping->last, update_flags,
>>>> - mapping->offset, mem,
>>>> - pages_addr, last_update);
>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
>>>> + resv, mapping->start, mapping->last,
>>>> + update_flags, mapping->offset,
>>>> + vram_base, mem, pages_addr,
>>>> + last_update);
>>>> if (r)
>>>> return r;
>>>> }
>>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>> list_splice_init(&bo_va->invalids, &bo_va->valids);
>>>> bo_va->cleared = clear;
>>>> + bo_va->base.moved = false;
>>>> if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>>> list_for_each_entry(mapping, &bo_va->valids, list)
>>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct
>>>> amdgpu_device *adev,
>>>> mapping->start < AMDGPU_GMC_HOLE_START)
>>>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>>> false,
>>>> - resv, mapping->start,
>>>> - mapping->last, init_pte_value,
>>>> - 0, NULL, NULL, &f);
>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, true,
>>>> resv,
>>>> + mapping->start, mapping->last,
>>>> + init_pte_value, 0, 0, NULL, NULL,
>>>> + &f);
>>>> amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>> if (r) {
>>>> dma_fence_put(f);
>>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct
>>>> amdgpu_device *adev, u32 pasid,
>>>> goto error_unlock;
>>>> }
>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false,
>>>> NULL, addr,
>>>> - addr, flags, value, NULL, NULL, NULL);
>>>> + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL,
>>>> addr,
>>>> + addr, flags, value, 0, NULL, NULL, NULL);
>>>> if (r)
>>>> goto error_unlock;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index 6b7682fe84f8..1a814fbffff8 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct
>>>> amdgpu_device *adev,
>>>> struct amdgpu_vm *vm);
>>>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>> struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>> - struct amdgpu_device *bo_adev,
>>>> - struct amdgpu_vm *vm, bool immediate,
>>>> - bool unlocked, struct dma_resv *resv,
>>>> - uint64_t start, uint64_t last,
>>>> - uint64_t flags, uint64_t offset,
>>>> - struct ttm_resource *res,
>>>> - dma_addr_t *pages_addr,
>>>> - struct dma_fence **fence);
>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct
>>>> amdgpu_vm *vm,
>>>> + bool immediate, bool unlocked, bool flush_tlb,
>>>> + struct dma_resv *resv, uint64_t start, uint64_t last,
>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>>>> + struct dma_fence **fence);
>>>> int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>> struct amdgpu_bo_va *bo_va,
>>>> bool clear);
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> index 27533f6057e0..907b02045824 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device
>>>> *adev, struct amdgpu_vm *vm,
>>>> pr_debug("[0x%llx 0x%llx]\n", start, last);
>>>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>>> true, NULL,
>>>> - start, last, init_pte_value, 0,
>>>> - NULL, NULL, fence);
>>>> + return amdgpu_vm_update_range(adev, vm, false, true, true,
>>>> NULL, start,
>>>> + last, init_pte_value, 0, 0, NULL, NULL,
>>>> + fence);
>>>> }
>>>> static int
>>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct
>>>> kfd_process_device *pdd, struct svm_range *prange,
>>>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>>> pte_flags);
>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm,
>>>> false, false,
>>>> - NULL, last_start,
>>>> - prange->start + i, pte_flags,
>>>> - last_start - prange->start,
>>>> - NULL, dma_addr,
>>>> - &vm->last_update);
>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, false,
>>>> NULL,
>>>> + last_start, prange->start + i,
>>>> + pte_flags,
>>>> + last_start - prange->start,
>>>> + bo_adev->vm_manager.vram_base_offset,
>>>> + NULL, dma_addr, &vm->last_update);
>>>> for (j = last_start - prange->start; j <= i; j++)
>>>> dma_addr[j] |= last_domain;
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
2022-04-01 8:24 ` Christian König
@ 2022-04-01 19:47 ` Felix Kuehling
2022-04-03 15:09 ` Christian König
0 siblings, 1 reply; 9+ messages in thread
From: Felix Kuehling @ 2022-04-01 19:47 UTC (permalink / raw)
To: Christian König, philip yang, amd-gfx, Philip.Yang
Cc: Christian König
On 2022-04-01 04:24, Christian König wrote:
> Am 31.03.22 um 16:37 schrieb Felix Kuehling:
>> Am 2022-03-31 um 02:27 schrieb Christian König:
>>> Am 30.03.22 um 22:51 schrieb philip yang:
>>>>
>>>>
>>>> On 2022-03-30 05:00, Christian König wrote:
>>>>> Testing the valid bit is not enough to figure out if we
>>>>> need to invalidate the TLB or not.
>>>>>
>>>>> During eviction it is quite likely that we move a BO from VRAM to
>>>>> GTT and
>>>>> update the page tables immediately to the new GTT address.
>>>>>
>>>>> Rework the whole function to get all the necessary parameters
>>>>> directly as
>>>>> value.
>>>>>
>>>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>>>> ---
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63
>>>>> ++++++++++++++------------
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++----
>>>>> 3 files changed, 48 insertions(+), 48 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 9992a7311387..1cac90ee3318 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct
>>>>> dma_fence *fence,
>>>>> }
>>>>> /**
>>>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page
>>>>> table
>>>>> + * amdgpu_vm_update_range - update a range in the vm page table
>>>>> *
>>>>> - * @adev: amdgpu_device pointer of the VM
>>>>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>>>>> - * @vm: requested vm
>>>>> + * @adev: amdgpu_device pointer to use for commands
>>>>> + * @vm: the VM to update the range
>>>>> * @immediate: immediate submission in a page fault
>>>>> * @unlocked: unlocked invalidation during MM callback
>>>>> + * @flush_tlb: trigger tlb invalidation after update completed
>>>>> * @resv: fences we need to sync to
>>>>> * @start: start of mapped range
>>>>> * @last: last mapped entry
>>>>> * @flags: flags for the entries
>>>>> * @offset: offset into nodes and pages_addr
>>>>> + * @vram_base: base for vram mappings
>>>>> * @res: ttm_resource to map
>>>>> * @pages_addr: DMA addresses to use for mapping
>>>>> * @fence: optional resulting fence
>>>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct
>>>>> dma_fence *fence,
>>>>> * Fill in the page table entries between @start and @last.
>>>>> *
>>>>> * Returns:
>>>>> - * 0 for success, -EINVAL for failure.
>>>>> + * 0 for success, negative erro code for failure.
>>>>> */
>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>>> - struct amdgpu_device *bo_adev,
>>>>> - struct amdgpu_vm *vm, bool immediate,
>>>>> - bool unlocked, struct dma_resv *resv,
>>>>> - uint64_t start, uint64_t last,
>>>>> - uint64_t flags, uint64_t offset,
>>>>> - struct ttm_resource *res,
>>>>> - dma_addr_t *pages_addr,
>>>>> - struct dma_fence **fence)
>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct
>>>>> amdgpu_vm *vm,
>>>>> + bool immediate, bool unlocked, bool flush_tlb,
>>>>> + struct dma_resv *resv, uint64_t start, uint64_t last,
>>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>>>>> + struct dma_fence **fence)
>>>>> {
>>>>> struct amdgpu_vm_update_params params;
>>>>> struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>>>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>> amdgpu_device *adev,
>>>>> }
>>>>> } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>>>> - addr = bo_adev->vm_manager.vram_base_offset +
>>>>> - cursor.start;
>>>>> + addr = vram_base + cursor.start;
>>>>> } else {
>>>>> addr = 0;
>>>>> }
>>>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>> amdgpu_device *adev,
>>>>> r = vm->update_funcs->commit(¶ms, fence);
>>>>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>>>>> + if (flush_tlb || params.table_freed) {
>>>>
>>>> All change look good to me, but when I look at previous changes
>>>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this
>>>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)).
>>>>
>>>
>>> That was intentionally dropped as Felix said it wouldn't be
>>> necessary any more with the TLB flush rework.
>>>
>>> Is that really causing any trouble?
>>
>> I discussed it with Philip offline. The TLB flushing in
>> kfd_ioctl_map_memory_to_gpu is still there, but it is no longer
>> conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb
>> checks the sequence number to find out if the flush is needed
>> (presumably because we didn't flush after unmap).
>>
>> There is one case on Vega20+XGMI where PTEs get inadvertently cached
>> in L2 texture cache.
>
> Ah, that one. Yeah I do remember that issue.
>
>> In that case, we probably need to flush more frequently because a
>> cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb
>> would need to ignore the sequence number and heavy-weight flush TLB
>> unconditionally in this case.
>
> Ok, but I think that is outside of the scope of the VM handling then.
> Or should we somehow handle that in the VM code as well?
I think it would make sense to apply the workaround in the VM code now.
You'd need to do this on all mappings on Vega20+XGMI. It doesn't matter
whether the mapping itself involves XGMI. AIUI, the incorrect caching
happens for all mappings when the XGMI bridge is physically present.
>
> I mean incrementing the sequence when the involved BO is mapped
> through XGMI is trivial. I just can't easily signal that we need a
> heavy-weight flush.
There is already code in gmc_v9_0.c to force heavy-weight flushing, and
doing an double flush to make sure the TLB is flushed after the L2
texture cache. grep -A4 "Vega20+XGMI" gmc_v9_0.c for details.
Regards,
Felix
>
> Thanks,
> Christian.
>
>>
>> Regards,
>> Felix
>>
>>>
>>> Christian.
>>>
>>>> To fix it, seems we need beow change, then pass flush_tlb flag via
>>>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte
>>>> -> amdgpu_vm_bo_update
>>>>
>>>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
>>>> amdgpu_bo_va *bo_va,
>>>> bool clear)
>>>>
>>>> - bool flush_tlb = clear;
>>>>
>>>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
>>>> amdgpu_bo_va *bo_va,
>>>> bool clear, bool flush_tlb)
>>>>
>>>> + flush_tlb |= clear;
>>>>
>>>> Regards,
>>>>
>>>> Philip
>>>>
>>>>> tlb_cb->vm = vm;
>>>>> if (!fence || !*fence ||
>>>>> dma_fence_add_callback(*fence, &tlb_cb->cb,
>>>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct
>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>> dma_addr_t *pages_addr = NULL;
>>>>> struct ttm_resource *mem;
>>>>> struct dma_fence **last_update;
>>>>> + bool flush_tlb = clear;
>>>>> struct dma_resv *resv;
>>>>> + uint64_t vram_base;
>>>>> uint64_t flags;
>>>>> - struct amdgpu_device *bo_adev = adev;
>>>>> int r;
>>>>> if (clear || !bo) {
>>>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct
>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>> }
>>>>> if (bo) {
>>>>> + struct amdgpu_device *bo_adev = adev;
>>>>> +
>>>>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>> if (amdgpu_bo_encrypted(bo))
>>>>> flags |= AMDGPU_PTE_TMZ;
>>>>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>> + vram_base = bo_adev->vm_manager.vram_base_offset;
>>>>> } else {
>>>>> flags = 0x0;
>>>>> + vram_base = 0;
>>>>> }
>>>>> if (clear || (bo && bo->tbo.base.resv ==
>>>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>> last_update = &bo_va->last_pt_update;
>>>>> if (!clear && bo_va->base.moved) {
>>>>> - bo_va->base.moved = false;
>>>>> + flush_tlb = true;
>>>>> list_splice_init(&bo_va->valids, &bo_va->invalids);
>>>>> } else if (bo_va->cleared != clear) {
>>>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct
>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>> trace_amdgpu_vm_bo_update(mapping);
>>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm,
>>>>> false, false,
>>>>> - resv, mapping->start,
>>>>> - mapping->last, update_flags,
>>>>> - mapping->offset, mem,
>>>>> - pages_addr, last_update);
>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false,
>>>>> flush_tlb,
>>>>> + resv, mapping->start, mapping->last,
>>>>> + update_flags, mapping->offset,
>>>>> + vram_base, mem, pages_addr,
>>>>> + last_update);
>>>>> if (r)
>>>>> return r;
>>>>> }
>>>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>> list_splice_init(&bo_va->invalids, &bo_va->valids);
>>>>> bo_va->cleared = clear;
>>>>> + bo_va->base.moved = false;
>>>>> if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>>>> list_for_each_entry(mapping, &bo_va->valids, list)
>>>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct
>>>>> amdgpu_device *adev,
>>>>> mapping->start < AMDGPU_GMC_HOLE_START)
>>>>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>>>> false,
>>>>> - resv, mapping->start,
>>>>> - mapping->last, init_pte_value,
>>>>> - 0, NULL, NULL, &f);
>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, true,
>>>>> resv,
>>>>> + mapping->start, mapping->last,
>>>>> + init_pte_value, 0, 0, NULL, NULL,
>>>>> + &f);
>>>>> amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>> if (r) {
>>>>> dma_fence_put(f);
>>>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct
>>>>> amdgpu_device *adev, u32 pasid,
>>>>> goto error_unlock;
>>>>> }
>>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true,
>>>>> false, NULL, addr,
>>>>> - addr, flags, value, NULL, NULL, NULL);
>>>>> + r = amdgpu_vm_update_range(adev, vm, true, false, false,
>>>>> NULL, addr,
>>>>> + addr, flags, value, 0, NULL, NULL, NULL);
>>>>> if (r)
>>>>> goto error_unlock;
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index 6b7682fe84f8..1a814fbffff8 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct
>>>>> amdgpu_device *adev,
>>>>> struct amdgpu_vm *vm);
>>>>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>>> struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>>> - struct amdgpu_device *bo_adev,
>>>>> - struct amdgpu_vm *vm, bool immediate,
>>>>> - bool unlocked, struct dma_resv *resv,
>>>>> - uint64_t start, uint64_t last,
>>>>> - uint64_t flags, uint64_t offset,
>>>>> - struct ttm_resource *res,
>>>>> - dma_addr_t *pages_addr,
>>>>> - struct dma_fence **fence);
>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct
>>>>> amdgpu_vm *vm,
>>>>> + bool immediate, bool unlocked, bool flush_tlb,
>>>>> + struct dma_resv *resv, uint64_t start, uint64_t last,
>>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>>>>> + struct dma_fence **fence);
>>>>> int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>>> struct amdgpu_bo_va *bo_va,
>>>>> bool clear);
>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>> index 27533f6057e0..907b02045824 100644
>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>> pr_debug("[0x%llx 0x%llx]\n", start, last);
>>>>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>>>> true, NULL,
>>>>> - start, last, init_pte_value, 0,
>>>>> - NULL, NULL, fence);
>>>>> + return amdgpu_vm_update_range(adev, vm, false, true, true,
>>>>> NULL, start,
>>>>> + last, init_pte_value, 0, 0, NULL, NULL,
>>>>> + fence);
>>>>> }
>>>>> static int
>>>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct
>>>>> kfd_process_device *pdd, struct svm_range *prange,
>>>>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>>>> pte_flags);
>>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm,
>>>>> false, false,
>>>>> - NULL, last_start,
>>>>> - prange->start + i, pte_flags,
>>>>> - last_start - prange->start,
>>>>> - NULL, dma_addr,
>>>>> - &vm->last_update);
>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, false,
>>>>> NULL,
>>>>> + last_start, prange->start + i,
>>>>> + pte_flags,
>>>>> + last_start - prange->start,
>>>>> + bo_adev->vm_manager.vram_base_offset,
>>>>> + NULL, dma_addr, &vm->last_update);
>>>>> for (j = last_start - prange->start; j <= i; j++)
>>>>> dma_addr[j] |= last_domain;
>>>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
2022-04-01 19:47 ` Felix Kuehling
@ 2022-04-03 15:09 ` Christian König
0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2022-04-03 15:09 UTC (permalink / raw)
To: Felix Kuehling, Christian König, philip yang, amd-gfx, Philip.Yang
Am 01.04.22 um 21:47 schrieb Felix Kuehling:
>
> On 2022-04-01 04:24, Christian König wrote:
>> Am 31.03.22 um 16:37 schrieb Felix Kuehling:
>>> Am 2022-03-31 um 02:27 schrieb Christian König:
>>>> Am 30.03.22 um 22:51 schrieb philip yang:
>>>>>
>>>>>
>>>>> On 2022-03-30 05:00, Christian König wrote:
>>>>>> Testing the valid bit is not enough to figure out if we
>>>>>> need to invalidate the TLB or not.
>>>>>>
>>>>>> During eviction it is quite likely that we move a BO from VRAM to
>>>>>> GTT and
>>>>>> update the page tables immediately to the new GTT address.
>>>>>>
>>>>>> Rework the whole function to get all the necessary parameters
>>>>>> directly as
>>>>>> value.
>>>>>>
>>>>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>>>>> ---
>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63
>>>>>> ++++++++++++++------------
>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>>>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 18 ++++----
>>>>>> 3 files changed, 48 insertions(+), 48 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index 9992a7311387..1cac90ee3318 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct
>>>>>> dma_fence *fence,
>>>>>> }
>>>>>> /**
>>>>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page
>>>>>> table
>>>>>> + * amdgpu_vm_update_range - update a range in the vm page table
>>>>>> *
>>>>>> - * @adev: amdgpu_device pointer of the VM
>>>>>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>>>>>> - * @vm: requested vm
>>>>>> + * @adev: amdgpu_device pointer to use for commands
>>>>>> + * @vm: the VM to update the range
>>>>>> * @immediate: immediate submission in a page fault
>>>>>> * @unlocked: unlocked invalidation during MM callback
>>>>>> + * @flush_tlb: trigger tlb invalidation after update completed
>>>>>> * @resv: fences we need to sync to
>>>>>> * @start: start of mapped range
>>>>>> * @last: last mapped entry
>>>>>> * @flags: flags for the entries
>>>>>> * @offset: offset into nodes and pages_addr
>>>>>> + * @vram_base: base for vram mappings
>>>>>> * @res: ttm_resource to map
>>>>>> * @pages_addr: DMA addresses to use for mapping
>>>>>> * @fence: optional resulting fence
>>>>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct
>>>>>> dma_fence *fence,
>>>>>> * Fill in the page table entries between @start and @last.
>>>>>> *
>>>>>> * Returns:
>>>>>> - * 0 for success, -EINVAL for failure.
>>>>>> + * 0 for success, negative erro code for failure.
>>>>>> */
>>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>>>> - struct amdgpu_device *bo_adev,
>>>>>> - struct amdgpu_vm *vm, bool immediate,
>>>>>> - bool unlocked, struct dma_resv *resv,
>>>>>> - uint64_t start, uint64_t last,
>>>>>> - uint64_t flags, uint64_t offset,
>>>>>> - struct ttm_resource *res,
>>>>>> - dma_addr_t *pages_addr,
>>>>>> - struct dma_fence **fence)
>>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct
>>>>>> amdgpu_vm *vm,
>>>>>> + bool immediate, bool unlocked, bool flush_tlb,
>>>>>> + struct dma_resv *resv, uint64_t start, uint64_t
>>>>>> last,
>>>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>>>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>>>>>> + struct dma_fence **fence)
>>>>>> {
>>>>>> struct amdgpu_vm_update_params params;
>>>>>> struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>>>>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>>> amdgpu_device *adev,
>>>>>> }
>>>>>> } else if (flags & (AMDGPU_PTE_VALID |
>>>>>> AMDGPU_PTE_PRT)) {
>>>>>> - addr = bo_adev->vm_manager.vram_base_offset +
>>>>>> - cursor.start;
>>>>>> + addr = vram_base + cursor.start;
>>>>>> } else {
>>>>>> addr = 0;
>>>>>> }
>>>>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>>> amdgpu_device *adev,
>>>>>> r = vm->update_funcs->commit(¶ms, fence);
>>>>>> - if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>>>>>> + if (flush_tlb || params.table_freed) {
>>>>>
>>>>> All change look good to me, but when I look at previous changes
>>>>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this
>>>>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)).
>>>>>
>>>>
>>>> That was intentionally dropped as Felix said it wouldn't be
>>>> necessary any more with the TLB flush rework.
>>>>
>>>> Is that really causing any trouble?
>>>
>>> I discussed it with Philip offline. The TLB flushing in
>>> kfd_ioctl_map_memory_to_gpu is still there, but it is no longer
>>> conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb
>>> checks the sequence number to find out if the flush is needed
>>> (presumably because we didn't flush after unmap).
>>>
>>> There is one case on Vega20+XGMI where PTEs get inadvertently cached
>>> in L2 texture cache.
>>
>> Ah, that one. Yeah I do remember that issue.
>>
>>> In that case, we probably need to flush more frequently because a
>>> cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb
>>> would need to ignore the sequence number and heavy-weight flush TLB
>>> unconditionally in this case.
>>
>> Ok, but I think that is outside of the scope of the VM handling then.
>> Or should we somehow handle that in the VM code as well?
>
> I think it would make sense to apply the workaround in the VM code
> now. You'd need to do this on all mappings on Vega20+XGMI. It doesn't
> matter whether the mapping itself involves XGMI. AIUI, the incorrect
> caching happens for all mappings when the XGMI bridge is physically
> present.
Good point! Looks like Philip already send a patch for this, going to
review that one then.
Thanks,
Christian.
>
>
>>
>> I mean incrementing the sequence when the involved BO is mapped
>> through XGMI is trivial. I just can't easily signal that we need a
>> heavy-weight flush.
>
> There is already code in gmc_v9_0.c to force heavy-weight flushing,
> and doing an double flush to make sure the TLB is flushed after the L2
> texture cache. grep -A4 "Vega20+XGMI" gmc_v9_0.c for details.
>
> Regards,
> Felix
>
>
>>
>> Thanks,
>> Christian.
>>
>>>
>>> Regards,
>>> Felix
>>>
>>>>
>>>> Christian.
>>>>
>>>>> To fix it, seems we need beow change, then pass flush_tlb flag via
>>>>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte
>>>>> -> amdgpu_vm_bo_update
>>>>>
>>>>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
>>>>> amdgpu_bo_va *bo_va,
>>>>> bool clear)
>>>>>
>>>>> - bool flush_tlb = clear;
>>>>>
>>>>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct
>>>>> amdgpu_bo_va *bo_va,
>>>>> bool clear, bool flush_tlb)
>>>>>
>>>>> + flush_tlb |= clear;
>>>>>
>>>>> Regards,
>>>>>
>>>>> Philip
>>>>>
>>>>>> tlb_cb->vm = vm;
>>>>>> if (!fence || !*fence ||
>>>>>> dma_fence_add_callback(*fence, &tlb_cb->cb,
>>>>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>> dma_addr_t *pages_addr = NULL;
>>>>>> struct ttm_resource *mem;
>>>>>> struct dma_fence **last_update;
>>>>>> + bool flush_tlb = clear;
>>>>>> struct dma_resv *resv;
>>>>>> + uint64_t vram_base;
>>>>>> uint64_t flags;
>>>>>> - struct amdgpu_device *bo_adev = adev;
>>>>>> int r;
>>>>>> if (clear || !bo) {
>>>>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>> }
>>>>>> if (bo) {
>>>>>> + struct amdgpu_device *bo_adev = adev;
>>>>>> +
>>>>>> flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>>> if (amdgpu_bo_encrypted(bo))
>>>>>> flags |= AMDGPU_PTE_TMZ;
>>>>>> bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>>> + vram_base = bo_adev->vm_manager.vram_base_offset;
>>>>>> } else {
>>>>>> flags = 0x0;
>>>>>> + vram_base = 0;
>>>>>> }
>>>>>> if (clear || (bo && bo->tbo.base.resv ==
>>>>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>> last_update = &bo_va->last_pt_update;
>>>>>> if (!clear && bo_va->base.moved) {
>>>>>> - bo_va->base.moved = false;
>>>>>> + flush_tlb = true;
>>>>>> list_splice_init(&bo_va->valids, &bo_va->invalids);
>>>>>> } else if (bo_va->cleared != clear) {
>>>>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>> trace_amdgpu_vm_bo_update(mapping);
>>>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm,
>>>>>> false, false,
>>>>>> - resv, mapping->start,
>>>>>> - mapping->last, update_flags,
>>>>>> - mapping->offset, mem,
>>>>>> - pages_addr, last_update);
>>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false,
>>>>>> flush_tlb,
>>>>>> + resv, mapping->start, mapping->last,
>>>>>> + update_flags, mapping->offset,
>>>>>> + vram_base, mem, pages_addr,
>>>>>> + last_update);
>>>>>> if (r)
>>>>>> return r;
>>>>>> }
>>>>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>> list_splice_init(&bo_va->invalids, &bo_va->valids);
>>>>>> bo_va->cleared = clear;
>>>>>> + bo_va->base.moved = false;
>>>>>> if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>>>>> list_for_each_entry(mapping, &bo_va->valids, list)
>>>>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct
>>>>>> amdgpu_device *adev,
>>>>>> mapping->start < AMDGPU_GMC_HOLE_START)
>>>>>> init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>>>>> false,
>>>>>> - resv, mapping->start,
>>>>>> - mapping->last, init_pte_value,
>>>>>> - 0, NULL, NULL, &f);
>>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false, true,
>>>>>> resv,
>>>>>> + mapping->start, mapping->last,
>>>>>> + init_pte_value, 0, 0, NULL, NULL,
>>>>>> + &f);
>>>>>> amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>>> if (r) {
>>>>>> dma_fence_put(f);
>>>>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct
>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>> goto error_unlock;
>>>>>> }
>>>>>> - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true,
>>>>>> false, NULL, addr,
>>>>>> - addr, flags, value, NULL, NULL, NULL);
>>>>>> + r = amdgpu_vm_update_range(adev, vm, true, false, false,
>>>>>> NULL, addr,
>>>>>> + addr, flags, value, 0, NULL, NULL, NULL);
>>>>>> if (r)
>>>>>> goto error_unlock;
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> index 6b7682fe84f8..1a814fbffff8 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct
>>>>>> amdgpu_device *adev,
>>>>>> struct amdgpu_vm *vm);
>>>>>> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>>>> struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>>>> - struct amdgpu_device *bo_adev,
>>>>>> - struct amdgpu_vm *vm, bool immediate,
>>>>>> - bool unlocked, struct dma_resv *resv,
>>>>>> - uint64_t start, uint64_t last,
>>>>>> - uint64_t flags, uint64_t offset,
>>>>>> - struct ttm_resource *res,
>>>>>> - dma_addr_t *pages_addr,
>>>>>> - struct dma_fence **fence);
>>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct
>>>>>> amdgpu_vm *vm,
>>>>>> + bool immediate, bool unlocked, bool flush_tlb,
>>>>>> + struct dma_resv *resv, uint64_t start, uint64_t
>>>>>> last,
>>>>>> + uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>>>> + struct ttm_resource *res, dma_addr_t *pages_addr,
>>>>>> + struct dma_fence **fence);
>>>>>> int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>>>> struct amdgpu_bo_va *bo_va,
>>>>>> bool clear);
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> index 27533f6057e0..907b02045824 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>> pr_debug("[0x%llx 0x%llx]\n", start, last);
>>>>>> - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>>>>> true, NULL,
>>>>>> - start, last, init_pte_value, 0,
>>>>>> - NULL, NULL, fence);
>>>>>> + return amdgpu_vm_update_range(adev, vm, false, true, true,
>>>>>> NULL, start,
>>>>>> + last, init_pte_value, 0, 0, NULL, NULL,
>>>>>> + fence);
>>>>>> }
>>>>>> static int
>>>>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct
>>>>>> kfd_process_device *pdd, struct svm_range *prange,
>>>>>> (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>>>>> pte_flags);
>>>>>> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm,
>>>>>> false, false,
>>>>>> - NULL, last_start,
>>>>>> - prange->start + i, pte_flags,
>>>>>> - last_start - prange->start,
>>>>>> - NULL, dma_addr,
>>>>>> - &vm->last_update);
>>>>>> + r = amdgpu_vm_update_range(adev, vm, false, false,
>>>>>> false, NULL,
>>>>>> + last_start, prange->start + i,
>>>>>> + pte_flags,
>>>>>> + last_start - prange->start,
>>>>>> + bo_adev->vm_manager.vram_base_offset,
>>>>>> + NULL, dma_addr, &vm->last_update);
>>>>>> for (j = last_start - prange->start; j <= i; j++)
>>>>>> dma_addr[j] |= last_domain;
>>>>
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-04-03 15:09 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-30 9:00 [PATCH] drm/amdgpu: fix TLB flushing during eviction Christian König
2022-03-30 20:51 ` philip yang
2022-03-31 6:27 ` Christian König
2022-03-31 14:37 ` Felix Kuehling
2022-03-31 15:42 ` philip yang
2022-03-31 15:46 ` philip yang
2022-04-01 8:24 ` Christian König
2022-04-01 19:47 ` Felix Kuehling
2022-04-03 15:09 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.