All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: fix TLB flushing during eviction
@ 2022-03-30  9:00 Christian König
  2022-03-30 20:51 ` philip yang
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2022-03-30  9:00 UTC (permalink / raw)
  To: amd-gfx, Philip.Yang, felix.kuehling; +Cc: Christian König

Testing the valid bit is not enough to figure out if we
need to invalidate the TLB or not.

During eviction it is quite likely that we move a BO from VRAM to GTT and
update the page tables immediately to the new GTT address.

Rework the whole function to get all the necessary parameters directly as
value.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 ++++++++++++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 18 ++++----
 3 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9992a7311387..1cac90ee3318 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
 }
 
 /**
- * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
+ * amdgpu_vm_update_range - update a range in the vm page table
  *
- * @adev: amdgpu_device pointer of the VM
- * @bo_adev: amdgpu_device pointer of the mapped BO
- * @vm: requested vm
+ * @adev: amdgpu_device pointer to use for commands
+ * @vm: the VM to update the range
  * @immediate: immediate submission in a page fault
  * @unlocked: unlocked invalidation during MM callback
+ * @flush_tlb: trigger tlb invalidation after update completed
  * @resv: fences we need to sync to
  * @start: start of mapped range
  * @last: last mapped entry
  * @flags: flags for the entries
  * @offset: offset into nodes and pages_addr
+ * @vram_base: base for vram mappings
  * @res: ttm_resource to map
  * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
@@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
  * Fill in the page table entries between @start and @last.
  *
  * Returns:
- * 0 for success, -EINVAL for failure.
+ * 0 for success, negative erro code for failure.
  */
-int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-				struct amdgpu_device *bo_adev,
-				struct amdgpu_vm *vm, bool immediate,
-				bool unlocked, struct dma_resv *resv,
-				uint64_t start, uint64_t last,
-				uint64_t flags, uint64_t offset,
-				struct ttm_resource *res,
-				dma_addr_t *pages_addr,
-				struct dma_fence **fence)
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			   bool immediate, bool unlocked, bool flush_tlb,
+			   struct dma_resv *resv, uint64_t start, uint64_t last,
+			   uint64_t flags, uint64_t offset, uint64_t vram_base,
+			   struct ttm_resource *res, dma_addr_t *pages_addr,
+			   struct dma_fence **fence)
 {
 	struct amdgpu_vm_update_params params;
 	struct amdgpu_vm_tlb_seq_cb *tlb_cb;
@@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 			}
 
 		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
-			addr = bo_adev->vm_manager.vram_base_offset +
-				cursor.start;
+			addr = vram_base + cursor.start;
 		} else {
 			addr = 0;
 		}
@@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	r = vm->update_funcs->commit(&params, fence);
 
-	if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
+	if (flush_tlb || params.table_freed) {
 		tlb_cb->vm = vm;
 		if (!fence || !*fence ||
 		    dma_fence_add_callback(*fence, &tlb_cb->cb,
@@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 	dma_addr_t *pages_addr = NULL;
 	struct ttm_resource *mem;
 	struct dma_fence **last_update;
+	bool flush_tlb = clear;
 	struct dma_resv *resv;
+	uint64_t vram_base;
 	uint64_t flags;
-	struct amdgpu_device *bo_adev = adev;
 	int r;
 
 	if (clear || !bo) {
@@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 	}
 
 	if (bo) {
+		struct amdgpu_device *bo_adev = adev;
+
 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
 
 		if (amdgpu_bo_encrypted(bo))
 			flags |= AMDGPU_PTE_TMZ;
 
 		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+		vram_base = bo_adev->vm_manager.vram_base_offset;
 	} else {
 		flags = 0x0;
+		vram_base = 0;
 	}
 
 	if (clear || (bo && bo->tbo.base.resv ==
@@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 		last_update = &bo_va->last_pt_update;
 
 	if (!clear && bo_va->base.moved) {
-		bo_va->base.moved = false;
+		flush_tlb = true;
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
 
 	} else if (bo_va->cleared != clear) {
@@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 
 		trace_amdgpu_vm_bo_update(mapping);
 
-		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
-						resv, mapping->start,
-						mapping->last, update_flags,
-						mapping->offset, mem,
-						pages_addr, last_update);
+		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
+					   resv, mapping->start, mapping->last,
+					   update_flags, mapping->offset,
+					   vram_base, mem, pages_addr,
+					   last_update);
 		if (r)
 			return r;
 	}
@@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 
 	list_splice_init(&bo_va->invalids, &bo_va->valids);
 	bo_va->cleared = clear;
+	bo_va->base.moved = false;
 
 	if (trace_amdgpu_vm_bo_mapping_enabled()) {
 		list_for_each_entry(mapping, &bo_va->valids, list)
@@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		    mapping->start < AMDGPU_GMC_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
-		r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
-						resv, mapping->start,
-						mapping->last, init_pte_value,
-						0, NULL, NULL, &f);
+		r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
+					   mapping->start, mapping->last,
+					   init_pte_value, 0, 0, NULL, NULL,
+					   &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
 		if (r) {
 			dma_fence_put(f);
@@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 		goto error_unlock;
 	}
 
-	r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
-					addr, flags, value, NULL, NULL, NULL);
+	r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
+				   addr, flags, value, 0, NULL, NULL, NULL);
 	if (r)
 		goto error_unlock;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 6b7682fe84f8..1a814fbffff8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			   struct amdgpu_vm *vm);
 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
-int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-				struct amdgpu_device *bo_adev,
-				struct amdgpu_vm *vm, bool immediate,
-				bool unlocked, struct dma_resv *resv,
-				uint64_t start, uint64_t last,
-				uint64_t flags, uint64_t offset,
-				struct ttm_resource *res,
-				dma_addr_t *pages_addr,
-				struct dma_fence **fence);
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			   bool immediate, bool unlocked, bool flush_tlb,
+			   struct dma_resv *resv, uint64_t start, uint64_t last,
+			   uint64_t flags, uint64_t offset, uint64_t vram_base,
+			   struct ttm_resource *res, dma_addr_t *pages_addr,
+			   struct dma_fence **fence);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 27533f6057e0..907b02045824 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	pr_debug("[0x%llx 0x%llx]\n", start, last);
 
-	return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
-					   start, last, init_pte_value, 0,
-					   NULL, NULL, fence);
+	return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
+				      last, init_pte_value, 0, 0, NULL, NULL,
+				      fence);
 }
 
 static int
@@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
 			 pte_flags);
 
-		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
-						NULL, last_start,
-						prange->start + i, pte_flags,
-						last_start - prange->start,
-						NULL, dma_addr,
-						&vm->last_update);
+		r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
+					   last_start, prange->start + i,
+					   pte_flags,
+					   last_start - prange->start,
+					   bo_adev->vm_manager.vram_base_offset,
+					   NULL, dma_addr, &vm->last_update);
 
 		for (j = last_start - prange->start; j <= i; j++)
 			dma_addr[j] |= last_domain;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
  2022-03-30  9:00 [PATCH] drm/amdgpu: fix TLB flushing during eviction Christian König
@ 2022-03-30 20:51 ` philip yang
  2022-03-31  6:27   ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: philip yang @ 2022-03-30 20:51 UTC (permalink / raw)
  To: Christian König, amd-gfx, Philip.Yang, felix.kuehling
  Cc: Christian König

[-- Attachment #1: Type: text/html, Size: 10851 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
  2022-03-30 20:51 ` philip yang
@ 2022-03-31  6:27   ` Christian König
  2022-03-31 14:37     ` Felix Kuehling
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2022-03-31  6:27 UTC (permalink / raw)
  To: philip yang, amd-gfx, Philip.Yang, felix.kuehling; +Cc: Christian König

Am 30.03.22 um 22:51 schrieb philip yang:
>
>
> On 2022-03-30 05:00, Christian König wrote:
>> Testing the valid bit is not enough to figure out if we
>> need to invalidate the TLB or not.
>>
>> During eviction it is quite likely that we move a BO from VRAM to GTT and
>> update the page tables immediately to the new GTT address.
>>
>> Rework the whole function to get all the necessary parameters directly as
>> value.
>>
>> Signed-off-by: Christian König<christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 ++++++++++++++------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 18 ++++----
>>   3 files changed, 48 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9992a7311387..1cac90ee3318 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
>>   }
>>   
>>   /**
>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
>> + * amdgpu_vm_update_range - update a range in the vm page table
>>    *
>> - * @adev: amdgpu_device pointer of the VM
>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>> - * @vm: requested vm
>> + * @adev: amdgpu_device pointer to use for commands
>> + * @vm: the VM to update the range
>>    * @immediate: immediate submission in a page fault
>>    * @unlocked: unlocked invalidation during MM callback
>> + * @flush_tlb: trigger tlb invalidation after update completed
>>    * @resv: fences we need to sync to
>>    * @start: start of mapped range
>>    * @last: last mapped entry
>>    * @flags: flags for the entries
>>    * @offset: offset into nodes and pages_addr
>> + * @vram_base: base for vram mappings
>>    * @res: ttm_resource to map
>>    * @pages_addr: DMA addresses to use for mapping
>>    * @fence: optional resulting fence
>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
>>    * Fill in the page table entries between @start and @last.
>>    *
>>    * Returns:
>> - * 0 for success, -EINVAL for failure.
>> + * 0 for success, negative erro code for failure.
>>    */
>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> -				struct amdgpu_device *bo_adev,
>> -				struct amdgpu_vm *vm, bool immediate,
>> -				bool unlocked, struct dma_resv *resv,
>> -				uint64_t start, uint64_t last,
>> -				uint64_t flags, uint64_t offset,
>> -				struct ttm_resource *res,
>> -				dma_addr_t *pages_addr,
>> -				struct dma_fence **fence)
>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> +			   bool immediate, bool unlocked, bool flush_tlb,
>> +			   struct dma_resv *resv, uint64_t start, uint64_t last,
>> +			   uint64_t flags, uint64_t offset, uint64_t vram_base,
>> +			   struct ttm_resource *res, dma_addr_t *pages_addr,
>> +			   struct dma_fence **fence)
>>   {
>>   	struct amdgpu_vm_update_params params;
>>   	struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   			}
>>   
>>   		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>> -			addr = bo_adev->vm_manager.vram_base_offset +
>> -				cursor.start;
>> +			addr = vram_base + cursor.start;
>>   		} else {
>>   			addr = 0;
>>   		}
>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   
>>   	r = vm->update_funcs->commit(&params, fence);
>>   
>> -	if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>> +	if (flush_tlb || params.table_freed) {
>
> All change look good to me, but when I look at previous changes again, 
> kfd_ioctl_map_memory_to_gpu is not correct now, as this should flush 
> TLB if (!kfd_flush_tlb_after_unmap(dev)).
>

That was intentionally dropped as Felix said it wouldn't be necessary 
any more with the TLB flush rework.

Is that really causing any trouble?

Christian.

> To fix it, seems we need beow change, then pass flush_tlb flag via 
> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte -> 
> amdgpu_vm_bo_update
>
> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
> amdgpu_bo_va *bo_va,
>             bool clear)
>
> -    bool flush_tlb = clear;
>
> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
> amdgpu_bo_va *bo_va,
>             bool clear, bool flush_tlb)
>
> + flush_tlb |= clear;
>
> Regards,
>
> Philip
>
>>   		tlb_cb->vm = vm;
>>   		if (!fence || !*fence ||
>>   		    dma_fence_add_callback(*fence, &tlb_cb->cb,
>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   	dma_addr_t *pages_addr = NULL;
>>   	struct ttm_resource *mem;
>>   	struct dma_fence **last_update;
>> +	bool flush_tlb = clear;
>>   	struct dma_resv *resv;
>> +	uint64_t vram_base;
>>   	uint64_t flags;
>> -	struct amdgpu_device *bo_adev = adev;
>>   	int r;
>>   
>>   	if (clear || !bo) {
>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   	}
>>   
>>   	if (bo) {
>> +		struct amdgpu_device *bo_adev = adev;
>> +
>>   		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>   
>>   		if (amdgpu_bo_encrypted(bo))
>>   			flags |= AMDGPU_PTE_TMZ;
>>   
>>   		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> +		vram_base = bo_adev->vm_manager.vram_base_offset;
>>   	} else {
>>   		flags = 0x0;
>> +		vram_base = 0;
>>   	}
>>   
>>   	if (clear || (bo && bo->tbo.base.resv ==
>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   		last_update = &bo_va->last_pt_update;
>>   
>>   	if (!clear && bo_va->base.moved) {
>> -		bo_va->base.moved = false;
>> +		flush_tlb = true;
>>   		list_splice_init(&bo_va->valids, &bo_va->invalids);
>>   
>>   	} else if (bo_va->cleared != clear) {
>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   
>>   		trace_amdgpu_vm_bo_update(mapping);
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>> -						resv, mapping->start,
>> -						mapping->last, update_flags,
>> -						mapping->offset, mem,
>> -						pages_addr, last_update);
>> +		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
>> +					   resv, mapping->start, mapping->last,
>> +					   update_flags, mapping->offset,
>> +					   vram_base, mem, pages_addr,
>> +					   last_update);
>>   		if (r)
>>   			return r;
>>   	}
>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   
>>   	list_splice_init(&bo_va->invalids, &bo_va->valids);
>>   	bo_va->cleared = clear;
>> +	bo_va->base.moved = false;
>>   
>>   	if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>   		list_for_each_entry(mapping, &bo_va->valids, list)
>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>>   		    mapping->start < AMDGPU_GMC_HOLE_START)
>>   			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
>> -						resv, mapping->start,
>> -						mapping->last, init_pte_value,
>> -						0, NULL, NULL, &f);
>> +		r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
>> +					   mapping->start, mapping->last,
>> +					   init_pte_value, 0, 0, NULL, NULL,
>> +					   &f);
>>   		amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>   		if (r) {
>>   			dma_fence_put(f);
>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>>   		goto error_unlock;
>>   	}
>>   
>> -	r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
>> -					addr, flags, value, NULL, NULL, NULL);
>> +	r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
>> +				   addr, flags, value, 0, NULL, NULL, NULL);
>>   	if (r)
>>   		goto error_unlock;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 6b7682fe84f8..1a814fbffff8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>   			   struct amdgpu_vm *vm);
>>   void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>   			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>> -				struct amdgpu_device *bo_adev,
>> -				struct amdgpu_vm *vm, bool immediate,
>> -				bool unlocked, struct dma_resv *resv,
>> -				uint64_t start, uint64_t last,
>> -				uint64_t flags, uint64_t offset,
>> -				struct ttm_resource *res,
>> -				dma_addr_t *pages_addr,
>> -				struct dma_fence **fence);
>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> +			   bool immediate, bool unlocked, bool flush_tlb,
>> +			   struct dma_resv *resv, uint64_t start, uint64_t last,
>> +			   uint64_t flags, uint64_t offset, uint64_t vram_base,
>> +			   struct ttm_resource *res, dma_addr_t *pages_addr,
>> +			   struct dma_fence **fence);
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   			struct amdgpu_bo_va *bo_va,
>>   			bool clear);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 27533f6057e0..907b02045824 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   
>>   	pr_debug("[0x%llx 0x%llx]\n", start, last);
>>   
>> -	return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
>> -					   start, last, init_pte_value, 0,
>> -					   NULL, NULL, fence);
>> +	return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
>> +				      last, init_pte_value, 0, 0, NULL, NULL,
>> +				      fence);
>>   }
>>   
>>   static int
>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>>   			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>   			 pte_flags);
>>   
>> -		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
>> -						NULL, last_start,
>> -						prange->start + i, pte_flags,
>> -						last_start - prange->start,
>> -						NULL, dma_addr,
>> -						&vm->last_update);
>> +		r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
>> +					   last_start, prange->start + i,
>> +					   pte_flags,
>> +					   last_start - prange->start,
>> +					   bo_adev->vm_manager.vram_base_offset,
>> +					   NULL, dma_addr, &vm->last_update);
>>   
>>   		for (j = last_start - prange->start; j <= i; j++)
>>   			dma_addr[j] |= last_domain;


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
  2022-03-31  6:27   ` Christian König
@ 2022-03-31 14:37     ` Felix Kuehling
  2022-03-31 15:42       ` philip yang
                         ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Felix Kuehling @ 2022-03-31 14:37 UTC (permalink / raw)
  To: Christian König, philip yang, amd-gfx, Philip.Yang
  Cc: Christian König

Am 2022-03-31 um 02:27 schrieb Christian König:
> Am 30.03.22 um 22:51 schrieb philip yang:
>>
>>
>> On 2022-03-30 05:00, Christian König wrote:
>>> Testing the valid bit is not enough to figure out if we
>>> need to invalidate the TLB or not.
>>>
>>> During eviction it is quite likely that we move a BO from VRAM to 
>>> GTT and
>>> update the page tables immediately to the new GTT address.
>>>
>>> Rework the whole function to get all the necessary parameters 
>>> directly as
>>> value.
>>>
>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 
>>> ++++++++++++++------------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 18 ++++----
>>>   3 files changed, 48 insertions(+), 48 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 9992a7311387..1cac90ee3318 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>> dma_fence *fence,
>>>   }
>>>     /**
>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
>>> + * amdgpu_vm_update_range - update a range in the vm page table
>>>    *
>>> - * @adev: amdgpu_device pointer of the VM
>>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>>> - * @vm: requested vm
>>> + * @adev: amdgpu_device pointer to use for commands
>>> + * @vm: the VM to update the range
>>>    * @immediate: immediate submission in a page fault
>>>    * @unlocked: unlocked invalidation during MM callback
>>> + * @flush_tlb: trigger tlb invalidation after update completed
>>>    * @resv: fences we need to sync to
>>>    * @start: start of mapped range
>>>    * @last: last mapped entry
>>>    * @flags: flags for the entries
>>>    * @offset: offset into nodes and pages_addr
>>> + * @vram_base: base for vram mappings
>>>    * @res: ttm_resource to map
>>>    * @pages_addr: DMA addresses to use for mapping
>>>    * @fence: optional resulting fence
>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>> dma_fence *fence,
>>>    * Fill in the page table entries between @start and @last.
>>>    *
>>>    * Returns:
>>> - * 0 for success, -EINVAL for failure.
>>> + * 0 for success, negative erro code for failure.
>>>    */
>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>> -                struct amdgpu_device *bo_adev,
>>> -                struct amdgpu_vm *vm, bool immediate,
>>> -                bool unlocked, struct dma_resv *resv,
>>> -                uint64_t start, uint64_t last,
>>> -                uint64_t flags, uint64_t offset,
>>> -                struct ttm_resource *res,
>>> -                dma_addr_t *pages_addr,
>>> -                struct dma_fence **fence)
>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>> amdgpu_vm *vm,
>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>> +               struct dma_resv *resv, uint64_t start, uint64_t last,
>>> +               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>> +               struct ttm_resource *res, dma_addr_t *pages_addr,
>>> +               struct dma_fence **fence)
>>>   {
>>>       struct amdgpu_vm_update_params params;
>>>       struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct 
>>> amdgpu_device *adev,
>>>               }
>>>             } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>> -            addr = bo_adev->vm_manager.vram_base_offset +
>>> -                cursor.start;
>>> +            addr = vram_base + cursor.start;
>>>           } else {
>>>               addr = 0;
>>>           }
>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct 
>>> amdgpu_device *adev,
>>>         r = vm->update_funcs->commit(&params, fence);
>>>   -    if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>>> +    if (flush_tlb || params.table_freed) {
>>
>> All change look good to me, but when I look at previous changes 
>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this should 
>> flush TLB if (!kfd_flush_tlb_after_unmap(dev)).
>>
>
> That was intentionally dropped as Felix said it wouldn't be necessary 
> any more with the TLB flush rework.
>
> Is that really causing any trouble?

I discussed it with Philip offline. The TLB flushing in 
kfd_ioctl_map_memory_to_gpu is still there, but it is no longer 
conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb checks 
the sequence number to find out if the flush is needed (presumably 
because we didn't flush after unmap).

There is one case on Vega20+XGMI where PTEs get inadvertently cached in 
L2 texture cache. In that case, we probably need to flush more 
frequently because a cache line in L2 may contain stale invalid PTEs. So 
kfd_flush_tlb would need to ignore the sequence number and heavy-weight 
flush TLB unconditionally in this case.

Regards,
   Felix

>
> Christian.
>
>> To fix it, seems we need beow change, then pass flush_tlb flag via 
>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte -> 
>> amdgpu_vm_bo_update
>>
>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
>> amdgpu_bo_va *bo_va,
>>             bool clear)
>>
>> -    bool flush_tlb = clear;
>>
>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
>> amdgpu_bo_va *bo_va,
>>             bool clear, bool flush_tlb)
>>
>> + flush_tlb |= clear;
>>
>> Regards,
>>
>> Philip
>>
>>>           tlb_cb->vm = vm;
>>>           if (!fence || !*fence ||
>>>               dma_fence_add_callback(*fence, &tlb_cb->cb,
>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>       dma_addr_t *pages_addr = NULL;
>>>       struct ttm_resource *mem;
>>>       struct dma_fence **last_update;
>>> +    bool flush_tlb = clear;
>>>       struct dma_resv *resv;
>>> +    uint64_t vram_base;
>>>       uint64_t flags;
>>> -    struct amdgpu_device *bo_adev = adev;
>>>       int r;
>>>         if (clear || !bo) {
>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>       }
>>>         if (bo) {
>>> +        struct amdgpu_device *bo_adev = adev;
>>> +
>>>           flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>             if (amdgpu_bo_encrypted(bo))
>>>               flags |= AMDGPU_PTE_TMZ;
>>>             bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> +        vram_base = bo_adev->vm_manager.vram_base_offset;
>>>       } else {
>>>           flags = 0x0;
>>> +        vram_base = 0;
>>>       }
>>>         if (clear || (bo && bo->tbo.base.resv ==
>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>           last_update = &bo_va->last_pt_update;
>>>         if (!clear && bo_va->base.moved) {
>>> -        bo_va->base.moved = false;
>>> +        flush_tlb = true;
>>>           list_splice_init(&bo_va->valids, &bo_va->invalids);
>>>         } else if (bo_va->cleared != clear) {
>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>             trace_amdgpu_vm_bo_update(mapping);
>>>   -        r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, 
>>> false,
>>> -                        resv, mapping->start,
>>> -                        mapping->last, update_flags,
>>> -                        mapping->offset, mem,
>>> -                        pages_addr, last_update);
>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
>>> +                       resv, mapping->start, mapping->last,
>>> +                       update_flags, mapping->offset,
>>> +                       vram_base, mem, pages_addr,
>>> +                       last_update);
>>>           if (r)
>>>               return r;
>>>       }
>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>         list_splice_init(&bo_va->invalids, &bo_va->valids);
>>>       bo_va->cleared = clear;
>>> +    bo_va->base.moved = false;
>>>         if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>>           list_for_each_entry(mapping, &bo_va->valids, list)
>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct 
>>> amdgpu_device *adev,
>>>               mapping->start < AMDGPU_GMC_HOLE_START)
>>>               init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>   -        r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, 
>>> false,
>>> -                        resv, mapping->start,
>>> -                        mapping->last, init_pte_value,
>>> -                        0, NULL, NULL, &f);
>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
>>> +                       mapping->start, mapping->last,
>>> +                       init_pte_value, 0, 0, NULL, NULL,
>>> +                       &f);
>>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>           if (r) {
>>>               dma_fence_put(f);
>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct 
>>> amdgpu_device *adev, u32 pasid,
>>>           goto error_unlock;
>>>       }
>>>   -    r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, 
>>> NULL, addr,
>>> -                    addr, flags, value, NULL, NULL, NULL);
>>> +    r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, 
>>> addr,
>>> +                   addr, flags, value, 0, NULL, NULL, NULL);
>>>       if (r)
>>>           goto error_unlock;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 6b7682fe84f8..1a814fbffff8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct 
>>> amdgpu_device *adev,
>>>                  struct amdgpu_vm *vm);
>>>   void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>                   struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>> -                struct amdgpu_device *bo_adev,
>>> -                struct amdgpu_vm *vm, bool immediate,
>>> -                bool unlocked, struct dma_resv *resv,
>>> -                uint64_t start, uint64_t last,
>>> -                uint64_t flags, uint64_t offset,
>>> -                struct ttm_resource *res,
>>> -                dma_addr_t *pages_addr,
>>> -                struct dma_fence **fence);
>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>> amdgpu_vm *vm,
>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>> +               struct dma_resv *resv, uint64_t start, uint64_t last,
>>> +               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>> +               struct ttm_resource *res, dma_addr_t *pages_addr,
>>> +               struct dma_fence **fence);
>>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>               struct amdgpu_bo_va *bo_va,
>>>               bool clear);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index 27533f6057e0..907b02045824 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>>         pr_debug("[0x%llx 0x%llx]\n", start, last);
>>>   -    return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, 
>>> true, NULL,
>>> -                       start, last, init_pte_value, 0,
>>> -                       NULL, NULL, fence);
>>> +    return amdgpu_vm_update_range(adev, vm, false, true, true, 
>>> NULL, start,
>>> +                      last, init_pte_value, 0, 0, NULL, NULL,
>>> +                      fence);
>>>   }
>>>     static int
>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct 
>>> kfd_process_device *pdd, struct svm_range *prange,
>>>                (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>>                pte_flags);
>>>   -        r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, 
>>> false,
>>> -                        NULL, last_start,
>>> -                        prange->start + i, pte_flags,
>>> -                        last_start - prange->start,
>>> -                        NULL, dma_addr,
>>> -                        &vm->last_update);
>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, false, 
>>> NULL,
>>> +                       last_start, prange->start + i,
>>> +                       pte_flags,
>>> +                       last_start - prange->start,
>>> + bo_adev->vm_manager.vram_base_offset,
>>> +                       NULL, dma_addr, &vm->last_update);
>>>             for (j = last_start - prange->start; j <= i; j++)
>>>               dma_addr[j] |= last_domain;
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
  2022-03-31 14:37     ` Felix Kuehling
@ 2022-03-31 15:42       ` philip yang
  2022-03-31 15:46       ` philip yang
  2022-04-01  8:24       ` Christian König
  2 siblings, 0 replies; 9+ messages in thread
From: philip yang @ 2022-03-31 15:42 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, amd-gfx, Philip.Yang
  Cc: Christian König

[-- Attachment #1: Type: text/html, Size: 30125 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
  2022-03-31 14:37     ` Felix Kuehling
  2022-03-31 15:42       ` philip yang
@ 2022-03-31 15:46       ` philip yang
  2022-04-01  8:24       ` Christian König
  2 siblings, 0 replies; 9+ messages in thread
From: philip yang @ 2022-03-31 15:46 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, amd-gfx, Philip.Yang
  Cc: Christian König

[-- Attachment #1: Type: text/html, Size: 29695 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
  2022-03-31 14:37     ` Felix Kuehling
  2022-03-31 15:42       ` philip yang
  2022-03-31 15:46       ` philip yang
@ 2022-04-01  8:24       ` Christian König
  2022-04-01 19:47         ` Felix Kuehling
  2 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2022-04-01  8:24 UTC (permalink / raw)
  To: Felix Kuehling, philip yang, amd-gfx, Philip.Yang; +Cc: Christian König

Am 31.03.22 um 16:37 schrieb Felix Kuehling:
> Am 2022-03-31 um 02:27 schrieb Christian König:
>> Am 30.03.22 um 22:51 schrieb philip yang:
>>>
>>>
>>> On 2022-03-30 05:00, Christian König wrote:
>>>> Testing the valid bit is not enough to figure out if we
>>>> need to invalidate the TLB or not.
>>>>
>>>> During eviction it is quite likely that we move a BO from VRAM to 
>>>> GTT and
>>>> update the page tables immediately to the new GTT address.
>>>>
>>>> Rework the whole function to get all the necessary parameters 
>>>> directly as
>>>> value.
>>>>
>>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 
>>>> ++++++++++++++------------
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 18 ++++----
>>>>   3 files changed, 48 insertions(+), 48 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 9992a7311387..1cac90ee3318 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>>> dma_fence *fence,
>>>>   }
>>>>     /**
>>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page 
>>>> table
>>>> + * amdgpu_vm_update_range - update a range in the vm page table
>>>>    *
>>>> - * @adev: amdgpu_device pointer of the VM
>>>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>>>> - * @vm: requested vm
>>>> + * @adev: amdgpu_device pointer to use for commands
>>>> + * @vm: the VM to update the range
>>>>    * @immediate: immediate submission in a page fault
>>>>    * @unlocked: unlocked invalidation during MM callback
>>>> + * @flush_tlb: trigger tlb invalidation after update completed
>>>>    * @resv: fences we need to sync to
>>>>    * @start: start of mapped range
>>>>    * @last: last mapped entry
>>>>    * @flags: flags for the entries
>>>>    * @offset: offset into nodes and pages_addr
>>>> + * @vram_base: base for vram mappings
>>>>    * @res: ttm_resource to map
>>>>    * @pages_addr: DMA addresses to use for mapping
>>>>    * @fence: optional resulting fence
>>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>>> dma_fence *fence,
>>>>    * Fill in the page table entries between @start and @last.
>>>>    *
>>>>    * Returns:
>>>> - * 0 for success, -EINVAL for failure.
>>>> + * 0 for success, negative erro code for failure.
>>>>    */
>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>> -                struct amdgpu_device *bo_adev,
>>>> -                struct amdgpu_vm *vm, bool immediate,
>>>> -                bool unlocked, struct dma_resv *resv,
>>>> -                uint64_t start, uint64_t last,
>>>> -                uint64_t flags, uint64_t offset,
>>>> -                struct ttm_resource *res,
>>>> -                dma_addr_t *pages_addr,
>>>> -                struct dma_fence **fence)
>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>>> amdgpu_vm *vm,
>>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>>> +               struct dma_resv *resv, uint64_t start, uint64_t last,
>>>> +               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>> +               struct ttm_resource *res, dma_addr_t *pages_addr,
>>>> +               struct dma_fence **fence)
>>>>   {
>>>>       struct amdgpu_vm_update_params params;
>>>>       struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct 
>>>> amdgpu_device *adev,
>>>>               }
>>>>             } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>>> -            addr = bo_adev->vm_manager.vram_base_offset +
>>>> -                cursor.start;
>>>> +            addr = vram_base + cursor.start;
>>>>           } else {
>>>>               addr = 0;
>>>>           }
>>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct 
>>>> amdgpu_device *adev,
>>>>         r = vm->update_funcs->commit(&params, fence);
>>>>   -    if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>>>> +    if (flush_tlb || params.table_freed) {
>>>
>>> All change look good to me, but when I look at previous changes 
>>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this 
>>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)).
>>>
>>
>> That was intentionally dropped as Felix said it wouldn't be necessary 
>> any more with the TLB flush rework.
>>
>> Is that really causing any trouble?
>
> I discussed it with Philip offline. The TLB flushing in 
> kfd_ioctl_map_memory_to_gpu is still there, but it is no longer 
> conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb 
> checks the sequence number to find out if the flush is needed 
> (presumably because we didn't flush after unmap).
>
> There is one case on Vega20+XGMI where PTEs get inadvertently cached 
> in L2 texture cache.

Ah, that one. Yeah I do remember that issue.

> In that case, we probably need to flush more frequently because a 
> cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb 
> would need to ignore the sequence number and heavy-weight flush TLB 
> unconditionally in this case.

Ok, but I think that is outside of the scope of the VM handling then. Or 
should we somehow handle that in the VM code as well?

I mean incrementing the sequence when the involved BO is mapped through 
XGMI is trivial. I just can't easily signal that we need a heavy-weight 
flush.

Thanks,
Christian.

>
> Regards,
>   Felix
>
>>
>> Christian.
>>
>>> To fix it, seems we need beow change, then pass flush_tlb flag via 
>>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte 
>>> -> amdgpu_vm_bo_update
>>>
>>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
>>> amdgpu_bo_va *bo_va,
>>>             bool clear)
>>>
>>> -    bool flush_tlb = clear;
>>>
>>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
>>> amdgpu_bo_va *bo_va,
>>>             bool clear, bool flush_tlb)
>>>
>>> + flush_tlb |= clear;
>>>
>>> Regards,
>>>
>>> Philip
>>>
>>>>           tlb_cb->vm = vm;
>>>>           if (!fence || !*fence ||
>>>>               dma_fence_add_callback(*fence, &tlb_cb->cb,
>>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>       dma_addr_t *pages_addr = NULL;
>>>>       struct ttm_resource *mem;
>>>>       struct dma_fence **last_update;
>>>> +    bool flush_tlb = clear;
>>>>       struct dma_resv *resv;
>>>> +    uint64_t vram_base;
>>>>       uint64_t flags;
>>>> -    struct amdgpu_device *bo_adev = adev;
>>>>       int r;
>>>>         if (clear || !bo) {
>>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct 
>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>       }
>>>>         if (bo) {
>>>> +        struct amdgpu_device *bo_adev = adev;
>>>> +
>>>>           flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>             if (amdgpu_bo_encrypted(bo))
>>>>               flags |= AMDGPU_PTE_TMZ;
>>>>             bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>> +        vram_base = bo_adev->vm_manager.vram_base_offset;
>>>>       } else {
>>>>           flags = 0x0;
>>>> +        vram_base = 0;
>>>>       }
>>>>         if (clear || (bo && bo->tbo.base.resv ==
>>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>           last_update = &bo_va->last_pt_update;
>>>>         if (!clear && bo_va->base.moved) {
>>>> -        bo_va->base.moved = false;
>>>> +        flush_tlb = true;
>>>>           list_splice_init(&bo_va->valids, &bo_va->invalids);
>>>>         } else if (bo_va->cleared != clear) {
>>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct 
>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>             trace_amdgpu_vm_bo_update(mapping);
>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, 
>>>> false, false,
>>>> -                        resv, mapping->start,
>>>> -                        mapping->last, update_flags,
>>>> -                        mapping->offset, mem,
>>>> -                        pages_addr, last_update);
>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
>>>> +                       resv, mapping->start, mapping->last,
>>>> +                       update_flags, mapping->offset,
>>>> +                       vram_base, mem, pages_addr,
>>>> +                       last_update);
>>>>           if (r)
>>>>               return r;
>>>>       }
>>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>         list_splice_init(&bo_va->invalids, &bo_va->valids);
>>>>       bo_va->cleared = clear;
>>>> +    bo_va->base.moved = false;
>>>>         if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>>>           list_for_each_entry(mapping, &bo_va->valids, list)
>>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct 
>>>> amdgpu_device *adev,
>>>>               mapping->start < AMDGPU_GMC_HOLE_START)
>>>>               init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, 
>>>> false,
>>>> -                        resv, mapping->start,
>>>> -                        mapping->last, init_pte_value,
>>>> -                        0, NULL, NULL, &f);
>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, true, 
>>>> resv,
>>>> +                       mapping->start, mapping->last,
>>>> +                       init_pte_value, 0, 0, NULL, NULL,
>>>> +                       &f);
>>>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>           if (r) {
>>>>               dma_fence_put(f);
>>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct 
>>>> amdgpu_device *adev, u32 pasid,
>>>>           goto error_unlock;
>>>>       }
>>>>   -    r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, 
>>>> NULL, addr,
>>>> -                    addr, flags, value, NULL, NULL, NULL);
>>>> +    r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, 
>>>> addr,
>>>> +                   addr, flags, value, 0, NULL, NULL, NULL);
>>>>       if (r)
>>>>           goto error_unlock;
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index 6b7682fe84f8..1a814fbffff8 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct 
>>>> amdgpu_device *adev,
>>>>                  struct amdgpu_vm *vm);
>>>>   void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>>                   struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>> -                struct amdgpu_device *bo_adev,
>>>> -                struct amdgpu_vm *vm, bool immediate,
>>>> -                bool unlocked, struct dma_resv *resv,
>>>> -                uint64_t start, uint64_t last,
>>>> -                uint64_t flags, uint64_t offset,
>>>> -                struct ttm_resource *res,
>>>> -                dma_addr_t *pages_addr,
>>>> -                struct dma_fence **fence);
>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>>> amdgpu_vm *vm,
>>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>>> +               struct dma_resv *resv, uint64_t start, uint64_t last,
>>>> +               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>> +               struct ttm_resource *res, dma_addr_t *pages_addr,
>>>> +               struct dma_fence **fence);
>>>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>>               struct amdgpu_bo_va *bo_va,
>>>>               bool clear);
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> index 27533f6057e0..907b02045824 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device 
>>>> *adev, struct amdgpu_vm *vm,
>>>>         pr_debug("[0x%llx 0x%llx]\n", start, last);
>>>>   -    return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, 
>>>> true, NULL,
>>>> -                       start, last, init_pte_value, 0,
>>>> -                       NULL, NULL, fence);
>>>> +    return amdgpu_vm_update_range(adev, vm, false, true, true, 
>>>> NULL, start,
>>>> +                      last, init_pte_value, 0, 0, NULL, NULL,
>>>> +                      fence);
>>>>   }
>>>>     static int
>>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct 
>>>> kfd_process_device *pdd, struct svm_range *prange,
>>>>                (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>>>                pte_flags);
>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, 
>>>> false, false,
>>>> -                        NULL, last_start,
>>>> -                        prange->start + i, pte_flags,
>>>> -                        last_start - prange->start,
>>>> -                        NULL, dma_addr,
>>>> -                        &vm->last_update);
>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, false, 
>>>> NULL,
>>>> +                       last_start, prange->start + i,
>>>> +                       pte_flags,
>>>> +                       last_start - prange->start,
>>>> + bo_adev->vm_manager.vram_base_offset,
>>>> +                       NULL, dma_addr, &vm->last_update);
>>>>             for (j = last_start - prange->start; j <= i; j++)
>>>>               dma_addr[j] |= last_domain;
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
  2022-04-01  8:24       ` Christian König
@ 2022-04-01 19:47         ` Felix Kuehling
  2022-04-03 15:09           ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Felix Kuehling @ 2022-04-01 19:47 UTC (permalink / raw)
  To: Christian König, philip yang, amd-gfx, Philip.Yang
  Cc: Christian König


On 2022-04-01 04:24, Christian König wrote:
> Am 31.03.22 um 16:37 schrieb Felix Kuehling:
>> Am 2022-03-31 um 02:27 schrieb Christian König:
>>> Am 30.03.22 um 22:51 schrieb philip yang:
>>>>
>>>>
>>>> On 2022-03-30 05:00, Christian König wrote:
>>>>> Testing the valid bit is not enough to figure out if we
>>>>> need to invalidate the TLB or not.
>>>>>
>>>>> During eviction it is quite likely that we move a BO from VRAM to 
>>>>> GTT and
>>>>> update the page tables immediately to the new GTT address.
>>>>>
>>>>> Rework the whole function to get all the necessary parameters 
>>>>> directly as
>>>>> value.
>>>>>
>>>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 
>>>>> ++++++++++++++------------
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 18 ++++----
>>>>>   3 files changed, 48 insertions(+), 48 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 9992a7311387..1cac90ee3318 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>>>> dma_fence *fence,
>>>>>   }
>>>>>     /**
>>>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page 
>>>>> table
>>>>> + * amdgpu_vm_update_range - update a range in the vm page table
>>>>>    *
>>>>> - * @adev: amdgpu_device pointer of the VM
>>>>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>>>>> - * @vm: requested vm
>>>>> + * @adev: amdgpu_device pointer to use for commands
>>>>> + * @vm: the VM to update the range
>>>>>    * @immediate: immediate submission in a page fault
>>>>>    * @unlocked: unlocked invalidation during MM callback
>>>>> + * @flush_tlb: trigger tlb invalidation after update completed
>>>>>    * @resv: fences we need to sync to
>>>>>    * @start: start of mapped range
>>>>>    * @last: last mapped entry
>>>>>    * @flags: flags for the entries
>>>>>    * @offset: offset into nodes and pages_addr
>>>>> + * @vram_base: base for vram mappings
>>>>>    * @res: ttm_resource to map
>>>>>    * @pages_addr: DMA addresses to use for mapping
>>>>>    * @fence: optional resulting fence
>>>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>>>> dma_fence *fence,
>>>>>    * Fill in the page table entries between @start and @last.
>>>>>    *
>>>>>    * Returns:
>>>>> - * 0 for success, -EINVAL for failure.
>>>>> + * 0 for success, negative erro code for failure.
>>>>>    */
>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>>> -                struct amdgpu_device *bo_adev,
>>>>> -                struct amdgpu_vm *vm, bool immediate,
>>>>> -                bool unlocked, struct dma_resv *resv,
>>>>> -                uint64_t start, uint64_t last,
>>>>> -                uint64_t flags, uint64_t offset,
>>>>> -                struct ttm_resource *res,
>>>>> -                dma_addr_t *pages_addr,
>>>>> -                struct dma_fence **fence)
>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>>>> amdgpu_vm *vm,
>>>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>>>> +               struct dma_resv *resv, uint64_t start, uint64_t last,
>>>>> +               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>>> +               struct ttm_resource *res, dma_addr_t *pages_addr,
>>>>> +               struct dma_fence **fence)
>>>>>   {
>>>>>       struct amdgpu_vm_update_params params;
>>>>>       struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>>>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct 
>>>>> amdgpu_device *adev,
>>>>>               }
>>>>>             } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
>>>>> -            addr = bo_adev->vm_manager.vram_base_offset +
>>>>> -                cursor.start;
>>>>> +            addr = vram_base + cursor.start;
>>>>>           } else {
>>>>>               addr = 0;
>>>>>           }
>>>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct 
>>>>> amdgpu_device *adev,
>>>>>         r = vm->update_funcs->commit(&params, fence);
>>>>>   -    if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>>>>> +    if (flush_tlb || params.table_freed) {
>>>>
>>>> All change look good to me, but when I look at previous changes 
>>>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this 
>>>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)).
>>>>
>>>
>>> That was intentionally dropped as Felix said it wouldn't be 
>>> necessary any more with the TLB flush rework.
>>>
>>> Is that really causing any trouble?
>>
>> I discussed it with Philip offline. The TLB flushing in 
>> kfd_ioctl_map_memory_to_gpu is still there, but it is no longer 
>> conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb 
>> checks the sequence number to find out if the flush is needed 
>> (presumably because we didn't flush after unmap).
>>
>> There is one case on Vega20+XGMI where PTEs get inadvertently cached 
>> in L2 texture cache.
>
> Ah, that one. Yeah I do remember that issue.
>
>> In that case, we probably need to flush more frequently because a 
>> cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb 
>> would need to ignore the sequence number and heavy-weight flush TLB 
>> unconditionally in this case.
>
> Ok, but I think that is outside of the scope of the VM handling then. 
> Or should we somehow handle that in the VM code as well?

I think it would make sense to apply the workaround in the VM code now. 
You'd need to do this on all mappings on Vega20+XGMI. It doesn't matter 
whether the mapping itself involves XGMI. AIUI, the incorrect caching 
happens for all mappings when the XGMI bridge is physically present.


>
> I mean incrementing the sequence when the involved BO is mapped 
> through XGMI is trivial. I just can't easily signal that we need a 
> heavy-weight flush.

There is already code in gmc_v9_0.c to force heavy-weight flushing, and 
doing an double flush to make sure the TLB is flushed after the L2 
texture cache. grep -A4 "Vega20+XGMI" gmc_v9_0.c for details.

Regards,
   Felix


>
> Thanks,
> Christian.
>
>>
>> Regards,
>>   Felix
>>
>>>
>>> Christian.
>>>
>>>> To fix it, seems we need beow change, then pass flush_tlb flag via 
>>>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte 
>>>> -> amdgpu_vm_bo_update
>>>>
>>>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
>>>> amdgpu_bo_va *bo_va,
>>>>             bool clear)
>>>>
>>>> -    bool flush_tlb = clear;
>>>>
>>>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
>>>> amdgpu_bo_va *bo_va,
>>>>             bool clear, bool flush_tlb)
>>>>
>>>> + flush_tlb |= clear;
>>>>
>>>> Regards,
>>>>
>>>> Philip
>>>>
>>>>>           tlb_cb->vm = vm;
>>>>>           if (!fence || !*fence ||
>>>>>               dma_fence_add_callback(*fence, &tlb_cb->cb,
>>>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct 
>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>       dma_addr_t *pages_addr = NULL;
>>>>>       struct ttm_resource *mem;
>>>>>       struct dma_fence **last_update;
>>>>> +    bool flush_tlb = clear;
>>>>>       struct dma_resv *resv;
>>>>> +    uint64_t vram_base;
>>>>>       uint64_t flags;
>>>>> -    struct amdgpu_device *bo_adev = adev;
>>>>>       int r;
>>>>>         if (clear || !bo) {
>>>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct 
>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>       }
>>>>>         if (bo) {
>>>>> +        struct amdgpu_device *bo_adev = adev;
>>>>> +
>>>>>           flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>>             if (amdgpu_bo_encrypted(bo))
>>>>>               flags |= AMDGPU_PTE_TMZ;
>>>>>             bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>> +        vram_base = bo_adev->vm_manager.vram_base_offset;
>>>>>       } else {
>>>>>           flags = 0x0;
>>>>> +        vram_base = 0;
>>>>>       }
>>>>>         if (clear || (bo && bo->tbo.base.resv ==
>>>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>>           last_update = &bo_va->last_pt_update;
>>>>>         if (!clear && bo_va->base.moved) {
>>>>> -        bo_va->base.moved = false;
>>>>> +        flush_tlb = true;
>>>>>           list_splice_init(&bo_va->valids, &bo_va->invalids);
>>>>>         } else if (bo_va->cleared != clear) {
>>>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct 
>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>             trace_amdgpu_vm_bo_update(mapping);
>>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, 
>>>>> false, false,
>>>>> -                        resv, mapping->start,
>>>>> -                        mapping->last, update_flags,
>>>>> -                        mapping->offset, mem,
>>>>> -                        pages_addr, last_update);
>>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, 
>>>>> flush_tlb,
>>>>> +                       resv, mapping->start, mapping->last,
>>>>> +                       update_flags, mapping->offset,
>>>>> +                       vram_base, mem, pages_addr,
>>>>> +                       last_update);
>>>>>           if (r)
>>>>>               return r;
>>>>>       }
>>>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>>         list_splice_init(&bo_va->invalids, &bo_va->valids);
>>>>>       bo_va->cleared = clear;
>>>>> +    bo_va->base.moved = false;
>>>>>         if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>>>>           list_for_each_entry(mapping, &bo_va->valids, list)
>>>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct 
>>>>> amdgpu_device *adev,
>>>>>               mapping->start < AMDGPU_GMC_HOLE_START)
>>>>>               init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, 
>>>>> false,
>>>>> -                        resv, mapping->start,
>>>>> -                        mapping->last, init_pte_value,
>>>>> -                        0, NULL, NULL, &f);
>>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, true, 
>>>>> resv,
>>>>> +                       mapping->start, mapping->last,
>>>>> +                       init_pte_value, 0, 0, NULL, NULL,
>>>>> +                       &f);
>>>>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>>           if (r) {
>>>>>               dma_fence_put(f);
>>>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct 
>>>>> amdgpu_device *adev, u32 pasid,
>>>>>           goto error_unlock;
>>>>>       }
>>>>>   -    r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, 
>>>>> false, NULL, addr,
>>>>> -                    addr, flags, value, NULL, NULL, NULL);
>>>>> +    r = amdgpu_vm_update_range(adev, vm, true, false, false, 
>>>>> NULL, addr,
>>>>> +                   addr, flags, value, 0, NULL, NULL, NULL);
>>>>>       if (r)
>>>>>           goto error_unlock;
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index 6b7682fe84f8..1a814fbffff8 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct 
>>>>> amdgpu_device *adev,
>>>>>                  struct amdgpu_vm *vm);
>>>>>   void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>>>                   struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>>> -                struct amdgpu_device *bo_adev,
>>>>> -                struct amdgpu_vm *vm, bool immediate,
>>>>> -                bool unlocked, struct dma_resv *resv,
>>>>> -                uint64_t start, uint64_t last,
>>>>> -                uint64_t flags, uint64_t offset,
>>>>> -                struct ttm_resource *res,
>>>>> -                dma_addr_t *pages_addr,
>>>>> -                struct dma_fence **fence);
>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>>>> amdgpu_vm *vm,
>>>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>>>> +               struct dma_resv *resv, uint64_t start, uint64_t last,
>>>>> +               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>>> +               struct ttm_resource *res, dma_addr_t *pages_addr,
>>>>> +               struct dma_fence **fence);
>>>>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>>>               struct amdgpu_bo_va *bo_va,
>>>>>               bool clear);
>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>> index 27533f6057e0..907b02045824 100644
>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>         pr_debug("[0x%llx 0x%llx]\n", start, last);
>>>>>   -    return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, 
>>>>> true, NULL,
>>>>> -                       start, last, init_pte_value, 0,
>>>>> -                       NULL, NULL, fence);
>>>>> +    return amdgpu_vm_update_range(adev, vm, false, true, true, 
>>>>> NULL, start,
>>>>> +                      last, init_pte_value, 0, 0, NULL, NULL,
>>>>> +                      fence);
>>>>>   }
>>>>>     static int
>>>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct 
>>>>> kfd_process_device *pdd, struct svm_range *prange,
>>>>>                (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>>>>                pte_flags);
>>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, 
>>>>> false, false,
>>>>> -                        NULL, last_start,
>>>>> -                        prange->start + i, pte_flags,
>>>>> -                        last_start - prange->start,
>>>>> -                        NULL, dma_addr,
>>>>> -                        &vm->last_update);
>>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, false, 
>>>>> NULL,
>>>>> +                       last_start, prange->start + i,
>>>>> +                       pte_flags,
>>>>> +                       last_start - prange->start,
>>>>> + bo_adev->vm_manager.vram_base_offset,
>>>>> +                       NULL, dma_addr, &vm->last_update);
>>>>>             for (j = last_start - prange->start; j <= i; j++)
>>>>>               dma_addr[j] |= last_domain;
>>>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: fix TLB flushing during eviction
  2022-04-01 19:47         ` Felix Kuehling
@ 2022-04-03 15:09           ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2022-04-03 15:09 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, philip yang, amd-gfx, Philip.Yang

Am 01.04.22 um 21:47 schrieb Felix Kuehling:
>
> On 2022-04-01 04:24, Christian König wrote:
>> Am 31.03.22 um 16:37 schrieb Felix Kuehling:
>>> Am 2022-03-31 um 02:27 schrieb Christian König:
>>>> Am 30.03.22 um 22:51 schrieb philip yang:
>>>>>
>>>>>
>>>>> On 2022-03-30 05:00, Christian König wrote:
>>>>>> Testing the valid bit is not enough to figure out if we
>>>>>> need to invalidate the TLB or not.
>>>>>>
>>>>>> During eviction it is quite likely that we move a BO from VRAM to 
>>>>>> GTT and
>>>>>> update the page tables immediately to the new GTT address.
>>>>>>
>>>>>> Rework the whole function to get all the necessary parameters 
>>>>>> directly as
>>>>>> value.
>>>>>>
>>>>>> Signed-off-by: Christian König<christian.koenig@amd.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 63 
>>>>>> ++++++++++++++------------
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 +++---
>>>>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 18 ++++----
>>>>>>   3 files changed, 48 insertions(+), 48 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index 9992a7311387..1cac90ee3318 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -793,18 +793,19 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>>>>> dma_fence *fence,
>>>>>>   }
>>>>>>     /**
>>>>>> - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page 
>>>>>> table
>>>>>> + * amdgpu_vm_update_range - update a range in the vm page table
>>>>>>    *
>>>>>> - * @adev: amdgpu_device pointer of the VM
>>>>>> - * @bo_adev: amdgpu_device pointer of the mapped BO
>>>>>> - * @vm: requested vm
>>>>>> + * @adev: amdgpu_device pointer to use for commands
>>>>>> + * @vm: the VM to update the range
>>>>>>    * @immediate: immediate submission in a page fault
>>>>>>    * @unlocked: unlocked invalidation during MM callback
>>>>>> + * @flush_tlb: trigger tlb invalidation after update completed
>>>>>>    * @resv: fences we need to sync to
>>>>>>    * @start: start of mapped range
>>>>>>    * @last: last mapped entry
>>>>>>    * @flags: flags for the entries
>>>>>>    * @offset: offset into nodes and pages_addr
>>>>>> + * @vram_base: base for vram mappings
>>>>>>    * @res: ttm_resource to map
>>>>>>    * @pages_addr: DMA addresses to use for mapping
>>>>>>    * @fence: optional resulting fence
>>>>>> @@ -812,17 +813,14 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>>>>> dma_fence *fence,
>>>>>>    * Fill in the page table entries between @start and @last.
>>>>>>    *
>>>>>>    * Returns:
>>>>>> - * 0 for success, -EINVAL for failure.
>>>>>> + * 0 for success, negative erro code for failure.
>>>>>>    */
>>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>>>> -                struct amdgpu_device *bo_adev,
>>>>>> -                struct amdgpu_vm *vm, bool immediate,
>>>>>> -                bool unlocked, struct dma_resv *resv,
>>>>>> -                uint64_t start, uint64_t last,
>>>>>> -                uint64_t flags, uint64_t offset,
>>>>>> -                struct ttm_resource *res,
>>>>>> -                dma_addr_t *pages_addr,
>>>>>> -                struct dma_fence **fence)
>>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>>>>> amdgpu_vm *vm,
>>>>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>>>>> +               struct dma_resv *resv, uint64_t start, uint64_t 
>>>>>> last,
>>>>>> +               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>>>> +               struct ttm_resource *res, dma_addr_t *pages_addr,
>>>>>> +               struct dma_fence **fence)
>>>>>>   {
>>>>>>       struct amdgpu_vm_update_params params;
>>>>>>       struct amdgpu_vm_tlb_seq_cb *tlb_cb;
>>>>>> @@ -910,8 +908,7 @@ int amdgpu_vm_bo_update_mapping(struct 
>>>>>> amdgpu_device *adev,
>>>>>>               }
>>>>>>             } else if (flags & (AMDGPU_PTE_VALID | 
>>>>>> AMDGPU_PTE_PRT)) {
>>>>>> -            addr = bo_adev->vm_manager.vram_base_offset +
>>>>>> -                cursor.start;
>>>>>> +            addr = vram_base + cursor.start;
>>>>>>           } else {
>>>>>>               addr = 0;
>>>>>>           }
>>>>>> @@ -927,7 +924,7 @@ int amdgpu_vm_bo_update_mapping(struct 
>>>>>> amdgpu_device *adev,
>>>>>>         r = vm->update_funcs->commit(&params, fence);
>>>>>>   -    if (!(flags & AMDGPU_PTE_VALID) || params.table_freed) {
>>>>>> +    if (flush_tlb || params.table_freed) {
>>>>>
>>>>> All change look good to me, but when I look at previous changes 
>>>>> again, kfd_ioctl_map_memory_to_gpu is not correct now, as this 
>>>>> should flush TLB if (!kfd_flush_tlb_after_unmap(dev)).
>>>>>
>>>>
>>>> That was intentionally dropped as Felix said it wouldn't be 
>>>> necessary any more with the TLB flush rework.
>>>>
>>>> Is that really causing any trouble?
>>>
>>> I discussed it with Philip offline. The TLB flushing in 
>>> kfd_ioctl_map_memory_to_gpu is still there, but it is no longer 
>>> conditional on !kfd_flush_tlb_after_unmap. Instead kfd_flush_tlb 
>>> checks the sequence number to find out if the flush is needed 
>>> (presumably because we didn't flush after unmap).
>>>
>>> There is one case on Vega20+XGMI where PTEs get inadvertently cached 
>>> in L2 texture cache.
>>
>> Ah, that one. Yeah I do remember that issue.
>>
>>> In that case, we probably need to flush more frequently because a 
>>> cache line in L2 may contain stale invalid PTEs. So kfd_flush_tlb 
>>> would need to ignore the sequence number and heavy-weight flush TLB 
>>> unconditionally in this case.
>>
>> Ok, but I think that is outside of the scope of the VM handling then. 
>> Or should we somehow handle that in the VM code as well?
>
> I think it would make sense to apply the workaround in the VM code 
> now. You'd need to do this on all mappings on Vega20+XGMI. It doesn't 
> matter whether the mapping itself involves XGMI. AIUI, the incorrect 
> caching happens for all mappings when the XGMI bridge is physically 
> present.

Good point! Looks like Philip already send a patch for this, going to 
review that one then.

Thanks,
Christian.

>
>
>>
>> I mean incrementing the sequence when the involved BO is mapped 
>> through XGMI is trivial. I just can't easily signal that we need a 
>> heavy-weight flush.
>
> There is already code in gmc_v9_0.c to force heavy-weight flushing, 
> and doing an double flush to make sure the TLB is flushed after the L2 
> texture cache. grep -A4 "Vega20+XGMI" gmc_v9_0.c for details.
>
> Regards,
>   Felix
>
>
>>
>> Thanks,
>> Christian.
>>
>>>
>>> Regards,
>>>   Felix
>>>
>>>>
>>>> Christian.
>>>>
>>>>> To fix it, seems we need beow change, then pass flush_tlb flag via 
>>>>> kfd_ioctl_map_memory_to_gpu -> map_bo_to_gpuvm -> update_gpuvm_pte 
>>>>> -> amdgpu_vm_bo_update
>>>>>
>>>>> -int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
>>>>> amdgpu_bo_va *bo_va,
>>>>>             bool clear)
>>>>>
>>>>> -    bool flush_tlb = clear;
>>>>>
>>>>> +int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct 
>>>>> amdgpu_bo_va *bo_va,
>>>>>             bool clear, bool flush_tlb)
>>>>>
>>>>> + flush_tlb |= clear;
>>>>>
>>>>> Regards,
>>>>>
>>>>> Philip
>>>>>
>>>>>>           tlb_cb->vm = vm;
>>>>>>           if (!fence || !*fence ||
>>>>>>               dma_fence_add_callback(*fence, &tlb_cb->cb,
>>>>>> @@ -1010,9 +1007,10 @@ int amdgpu_vm_bo_update(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>>       dma_addr_t *pages_addr = NULL;
>>>>>>       struct ttm_resource *mem;
>>>>>>       struct dma_fence **last_update;
>>>>>> +    bool flush_tlb = clear;
>>>>>>       struct dma_resv *resv;
>>>>>> +    uint64_t vram_base;
>>>>>>       uint64_t flags;
>>>>>> -    struct amdgpu_device *bo_adev = adev;
>>>>>>       int r;
>>>>>>         if (clear || !bo) {
>>>>>> @@ -1037,14 +1035,18 @@ int amdgpu_vm_bo_update(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>>       }
>>>>>>         if (bo) {
>>>>>> +        struct amdgpu_device *bo_adev = adev;
>>>>>> +
>>>>>>           flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
>>>>>>             if (amdgpu_bo_encrypted(bo))
>>>>>>               flags |= AMDGPU_PTE_TMZ;
>>>>>>             bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>>> +        vram_base = bo_adev->vm_manager.vram_base_offset;
>>>>>>       } else {
>>>>>>           flags = 0x0;
>>>>>> +        vram_base = 0;
>>>>>>       }
>>>>>>         if (clear || (bo && bo->tbo.base.resv ==
>>>>>> @@ -1054,7 +1056,7 @@ int amdgpu_vm_bo_update(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>>           last_update = &bo_va->last_pt_update;
>>>>>>         if (!clear && bo_va->base.moved) {
>>>>>> -        bo_va->base.moved = false;
>>>>>> +        flush_tlb = true;
>>>>>>           list_splice_init(&bo_va->valids, &bo_va->invalids);
>>>>>>         } else if (bo_va->cleared != clear) {
>>>>>> @@ -1077,11 +1079,11 @@ int amdgpu_vm_bo_update(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>>             trace_amdgpu_vm_bo_update(mapping);
>>>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, 
>>>>>> false, false,
>>>>>> -                        resv, mapping->start,
>>>>>> -                        mapping->last, update_flags,
>>>>>> -                        mapping->offset, mem,
>>>>>> -                        pages_addr, last_update);
>>>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, 
>>>>>> flush_tlb,
>>>>>> +                       resv, mapping->start, mapping->last,
>>>>>> +                       update_flags, mapping->offset,
>>>>>> +                       vram_base, mem, pages_addr,
>>>>>> +                       last_update);
>>>>>>           if (r)
>>>>>>               return r;
>>>>>>       }
>>>>>> @@ -1104,6 +1106,7 @@ int amdgpu_vm_bo_update(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>>>>>         list_splice_init(&bo_va->invalids, &bo_va->valids);
>>>>>>       bo_va->cleared = clear;
>>>>>> +    bo_va->base.moved = false;
>>>>>>         if (trace_amdgpu_vm_bo_mapping_enabled()) {
>>>>>>           list_for_each_entry(mapping, &bo_va->valids, list)
>>>>>> @@ -1272,10 +1275,10 @@ int amdgpu_vm_clear_freed(struct 
>>>>>> amdgpu_device *adev,
>>>>>>               mapping->start < AMDGPU_GMC_HOLE_START)
>>>>>>               init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
>>>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, 
>>>>>> false,
>>>>>> -                        resv, mapping->start,
>>>>>> -                        mapping->last, init_pte_value,
>>>>>> -                        0, NULL, NULL, &f);
>>>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, true, 
>>>>>> resv,
>>>>>> +                       mapping->start, mapping->last,
>>>>>> +                       init_pte_value, 0, 0, NULL, NULL,
>>>>>> +                       &f);
>>>>>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>>>           if (r) {
>>>>>>               dma_fence_put(f);
>>>>>> @@ -2519,8 +2522,8 @@ bool amdgpu_vm_handle_fault(struct 
>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>>           goto error_unlock;
>>>>>>       }
>>>>>>   -    r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, 
>>>>>> false, NULL, addr,
>>>>>> -                    addr, flags, value, NULL, NULL, NULL);
>>>>>> +    r = amdgpu_vm_update_range(adev, vm, true, false, false, 
>>>>>> NULL, addr,
>>>>>> +                   addr, flags, value, 0, NULL, NULL, NULL);
>>>>>>       if (r)
>>>>>>           goto error_unlock;
>>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> index 6b7682fe84f8..1a814fbffff8 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> @@ -402,15 +402,12 @@ int amdgpu_vm_handle_moved(struct 
>>>>>> amdgpu_device *adev,
>>>>>>                  struct amdgpu_vm *vm);
>>>>>>   void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>>>>                   struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>>>>> -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>>>>> -                struct amdgpu_device *bo_adev,
>>>>>> -                struct amdgpu_vm *vm, bool immediate,
>>>>>> -                bool unlocked, struct dma_resv *resv,
>>>>>> -                uint64_t start, uint64_t last,
>>>>>> -                uint64_t flags, uint64_t offset,
>>>>>> -                struct ttm_resource *res,
>>>>>> -                dma_addr_t *pages_addr,
>>>>>> -                struct dma_fence **fence);
>>>>>> +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>>>>> amdgpu_vm *vm,
>>>>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>>>>> +               struct dma_resv *resv, uint64_t start, uint64_t 
>>>>>> last,
>>>>>> +               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>>>> +               struct ttm_resource *res, dma_addr_t *pages_addr,
>>>>>> +               struct dma_fence **fence);
>>>>>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>>>>               struct amdgpu_bo_va *bo_va,
>>>>>>               bool clear);
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> index 27533f6057e0..907b02045824 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>         pr_debug("[0x%llx 0x%llx]\n", start, last);
>>>>>>   -    return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, 
>>>>>> true, NULL,
>>>>>> -                       start, last, init_pte_value, 0,
>>>>>> -                       NULL, NULL, fence);
>>>>>> +    return amdgpu_vm_update_range(adev, vm, false, true, true, 
>>>>>> NULL, start,
>>>>>> +                      last, init_pte_value, 0, 0, NULL, NULL,
>>>>>> +                      fence);
>>>>>>   }
>>>>>>     static int
>>>>>> @@ -1277,12 +1277,12 @@ svm_range_map_to_gpu(struct 
>>>>>> kfd_process_device *pdd, struct svm_range *prange,
>>>>>>                (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>>>>>                pte_flags);
>>>>>>   -        r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, 
>>>>>> false, false,
>>>>>> -                        NULL, last_start,
>>>>>> -                        prange->start + i, pte_flags,
>>>>>> -                        last_start - prange->start,
>>>>>> -                        NULL, dma_addr,
>>>>>> -                        &vm->last_update);
>>>>>> +        r = amdgpu_vm_update_range(adev, vm, false, false, 
>>>>>> false, NULL,
>>>>>> +                       last_start, prange->start + i,
>>>>>> +                       pte_flags,
>>>>>> +                       last_start - prange->start,
>>>>>> + bo_adev->vm_manager.vram_base_offset,
>>>>>> +                       NULL, dma_addr, &vm->last_update);
>>>>>>             for (j = last_start - prange->start; j <= i; j++)
>>>>>>               dma_addr[j] |= last_domain;
>>>>
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-04-03 15:09 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-30  9:00 [PATCH] drm/amdgpu: fix TLB flushing during eviction Christian König
2022-03-30 20:51 ` philip yang
2022-03-31  6:27   ` Christian König
2022-03-31 14:37     ` Felix Kuehling
2022-03-31 15:42       ` philip yang
2022-03-31 15:46       ` philip yang
2022-04-01  8:24       ` Christian König
2022-04-01 19:47         ` Felix Kuehling
2022-04-03 15:09           ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.