All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed
@ 2021-05-29 22:51 Eric Huang
  2021-05-29 22:51 ` [PATCH 2/2] drm/amdkfd: optimize memory mapping latency Eric Huang
  2021-05-30 16:54 ` [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed Christian König
  0 siblings, 2 replies; 9+ messages in thread
From: Eric Huang @ 2021-05-29 22:51 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang

table_freed will be always true when mapping a memory with size
bigger than 2MB. The problem is page table's entries are always
existed, but existing mapping depends on page talbe's bo, so
using a check of page table's bo existed will resolve the issue.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0dee2e8797c7..95b94c95adac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1582,9 +1582,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 			 * completely covered by the range and so potentially still in use.
 			 */
 			while (cursor.pfn < frag_start) {
+				/* Make sure previous mapping existed */
+				if (cursor.entry->base.bo)
+					params->table_freed = true;
 				amdgpu_vm_free_pts(adev, params->vm, &cursor);
 				amdgpu_vm_pt_next(adev, &cursor);
-				params->table_freed = true;
 			}
 
 		} else if (frag >= shift) {
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/amdkfd: optimize memory mapping latency
  2021-05-29 22:51 [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed Eric Huang
@ 2021-05-29 22:51 ` Eric Huang
  2021-06-01 16:05   ` Felix Kuehling
  2021-05-30 16:54 ` [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed Christian König
  1 sibling, 1 reply; 9+ messages in thread
From: Eric Huang @ 2021-05-29 22:51 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang

1. conditionally flush TLBs after map.
2. add heavy weight TLBs flushing after unmap.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 24 +++++++++++--------
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  6 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  4 ++--
 8 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2560977760b3..997258c24ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -85,6 +85,7 @@ struct kgd_mem {
 
 	bool aql_queue;
 	bool is_imported;
+	bool table_freed;
 };
 
 /* KFD Memory Eviction */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 385c33675227..8ac0d849fd3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1132,6 +1132,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
 		return ret;
 	}
 
+	mem->table_freed = bo_va->table_freed;
+
 	return amdgpu_sync_fence(sync, bo_va->last_pt_update);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 36e7f088d4ee..0e0f27f779cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -87,6 +87,7 @@ struct amdgpu_bo_va {
 	bool				cleared;
 
 	bool				is_xgmi;
+	bool				table_freed;
 };
 
 struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 95b94c95adac..ff3eb8395017 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1885,7 +1885,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 						resv, mapping->start,
 						mapping->last, update_flags,
 						mapping->offset, mem,
-						pages_addr, last_update, NULL,
+						pages_addr, last_update, &bo_va->table_freed,
 						vram_base_offset);
 		if (r)
 			return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 960913a35ee4..c45ccd1d03c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1658,16 +1658,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 	}
 
 	/* Flush TLBs after waiting for the page table updates to complete */
-	for (i = 0; i < args->n_devices; i++) {
-		peer = kfd_device_by_id(devices_arr[i]);
-		if (WARN_ON_ONCE(!peer))
-			continue;
-		peer_pdd = kfd_get_process_device_data(peer, p);
-		if (WARN_ON_ONCE(!peer_pdd))
-			continue;
-		if (!amdgpu_read_lock(peer->ddev, true)) {
-			kfd_flush_tlb(peer_pdd);
-			amdgpu_read_unlock(peer->ddev);
+	if (((struct kgd_mem *)mem)->table_freed) {
+		for (i = 0; i < args->n_devices; i++) {
+			peer = kfd_device_by_id(devices_arr[i]);
+			if (WARN_ON_ONCE(!peer))
+				continue;
+			peer_pdd = kfd_get_process_device_data(peer, p);
+			if (WARN_ON_ONCE(!peer_pdd))
+				continue;
+			if (!amdgpu_read_lock(peer->ddev, true)) {
+				kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+				amdgpu_read_unlock(peer->ddev);
+			}
 		}
 	}
 
@@ -1766,6 +1768,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 			amdgpu_read_unlock(peer->ddev);
 			goto unmap_memory_from_gpu_failed;
 		}
+		((struct kgd_mem *)mem)->table_freed = false;
+		kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
 		amdgpu_read_unlock(peer->ddev);
 		args->n_success = i+1;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c1bea1f7627b..a4920bc5cfbc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 			qpd->vmid,
 			qpd->page_table_base);
 	/* invalidate the VM context after pasid and vmid mapping is set up */
-	kfd_flush_tlb(qpd_to_pdd(qpd));
+	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
 
 	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
 		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
@@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 		if (flush_texture_cache_nocpsch(q->device, qpd))
 			pr_err("Failed to flush TC\n");
 
-	kfd_flush_tlb(qpd_to_pdd(qpd));
+	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
 
 	/* Release the vmid mapping */
 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 				dqm->dev->kgd,
 				qpd->vmid,
 				qpd->page_table_base);
-		kfd_flush_tlb(pdd);
+		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
 	}
 
 	/* Take a safe reference to the mm_struct, which may otherwise
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ecdd5e782b81..edce3ecf207d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
 
 void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
 
-void kfd_flush_tlb(struct kfd_process_device *pdd);
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
 
 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4ab9da288f90..a03373743a3d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -2161,7 +2161,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
 
-void kfd_flush_tlb(struct kfd_process_device *pdd)
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
 {
 	struct kfd_dev *dev = pdd->dev;
 
@@ -2174,7 +2174,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
 							pdd->qpd.vmid);
 	} else {
 		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
-					pdd->process->pasid, TLB_FLUSH_LEGACY);
+					pdd->process->pasid, type);
 	}
 }
 
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed
  2021-05-29 22:51 [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed Eric Huang
  2021-05-29 22:51 ` [PATCH 2/2] drm/amdkfd: optimize memory mapping latency Eric Huang
@ 2021-05-30 16:54 ` Christian König
  2021-05-30 18:29   ` Eric Huang
  1 sibling, 1 reply; 9+ messages in thread
From: Christian König @ 2021-05-30 16:54 UTC (permalink / raw)
  To: Eric Huang, amd-gfx



Am 30.05.21 um 00:51 schrieb Eric Huang:
> table_freed will be always true when mapping a memory with size
> bigger than 2MB. The problem is page table's entries are always
> existed, but existing mapping depends on page talbe's bo, so
> using a check of page table's bo existed will resolve the issue.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++-
>   1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0dee2e8797c7..95b94c95adac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1582,9 +1582,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>   			 * completely covered by the range and so potentially still in use.
>   			 */
>   			while (cursor.pfn < frag_start) {
> +				/* Make sure previous mapping existed */
> +				if (cursor.entry->base.bo)
> +					params->table_freed = true;

In general this is the correct approach, but I would push that decision 
into the amdgpu_vm_free_pts() function.

>   				amdgpu_vm_free_pts(adev, params->vm, &cursor);

So that we have here something like

params->table_freed |= amdgpu_vm_free_pts(..);


Regards,
Christian.

>   				amdgpu_vm_pt_next(adev, &cursor);
> -				params->table_freed = true;
>   			}
>   
>   		} else if (frag >= shift) {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed
  2021-05-30 16:54 ` [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed Christian König
@ 2021-05-30 18:29   ` Eric Huang
  2021-05-31 14:08     ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Huang @ 2021-05-30 18:29 UTC (permalink / raw)
  To: Christian König, amd-gfx


On 2021-05-30 12:54 p.m., Christian König wrote:
>
>
> Am 30.05.21 um 00:51 schrieb Eric Huang:
>> table_freed will be always true when mapping a memory with size
>> bigger than 2MB. The problem is page table's entries are always
>> existed, but existing mapping depends on page talbe's bo, so
>> using a check of page table's bo existed will resolve the issue.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++-
>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 0dee2e8797c7..95b94c95adac 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1582,9 +1582,11 @@ static int amdgpu_vm_update_ptes(struct 
>> amdgpu_vm_update_params *params,
>>                * completely covered by the range and so potentially 
>> still in use.
>>                */
>>               while (cursor.pfn < frag_start) {
>> +                /* Make sure previous mapping existed */
>> +                if (cursor.entry->base.bo)
>> +                    params->table_freed = true;
>
> In general this is the correct approach, but I would push that 
> decision into the amdgpu_vm_free_pts() function.
>
>>                   amdgpu_vm_free_pts(adev, params->vm, &cursor);
>
> So that we have here something like
>
> params->table_freed |= amdgpu_vm_free_pts(..);
>
Thank you for your review. I was thinking put the check into function 
amdgpu_vm_free_pts() since previous review, it will change returns of 
two functions amdgpu_vm_free_pts() and amdgpu_vm_free_table(). If the 
returns are not used by other functions, it seems make a simple change 
complex from my perspective. Can you share the reason of your suggestion?

Regards,
Eric
>
> Regards,
> Christian.
>
>>                   amdgpu_vm_pt_next(adev, &cursor);
>> -                params->table_freed = true;
>>               }
>>             } else if (frag >= shift) {
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed
  2021-05-30 18:29   ` Eric Huang
@ 2021-05-31 14:08     ` Christian König
  2021-05-31 14:30       ` Eric Huang
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2021-05-31 14:08 UTC (permalink / raw)
  To: Eric Huang, amd-gfx



Am 30.05.21 um 20:29 schrieb Eric Huang:
>
> On 2021-05-30 12:54 p.m., Christian König wrote:
>>
>>
>> Am 30.05.21 um 00:51 schrieb Eric Huang:
>>> table_freed will be always true when mapping a memory with size
>>> bigger than 2MB. The problem is page table's entries are always
>>> existed, but existing mapping depends on page talbe's bo, so
>>> using a check of page table's bo existed will resolve the issue.
>>>
>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++-
>>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 0dee2e8797c7..95b94c95adac 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1582,9 +1582,11 @@ static int amdgpu_vm_update_ptes(struct 
>>> amdgpu_vm_update_params *params,
>>>                * completely covered by the range and so potentially 
>>> still in use.
>>>                */
>>>               while (cursor.pfn < frag_start) {
>>> +                /* Make sure previous mapping existed */
>>> +                if (cursor.entry->base.bo)
>>> +                    params->table_freed = true;
>>
>> In general this is the correct approach, but I would push that 
>> decision into the amdgpu_vm_free_pts() function.
>>
>>> amdgpu_vm_free_pts(adev, params->vm, &cursor);
>>
>> So that we have here something like
>>
>> params->table_freed |= amdgpu_vm_free_pts(..);
>>
> Thank you for your review. I was thinking put the check into function 
> amdgpu_vm_free_pts() since previous review, it will change returns of 
> two functions amdgpu_vm_free_pts() and amdgpu_vm_free_table(). If the 
> returns are not used by other functions, it seems make a simple change 
> complex from my perspective. Can you share the reason of your suggestion?

Because you can also optimize the bulk_moveable handling in that function.

E.g. bulk_moveable should only be set to false when a table was freed.

The only case where this doesn't matter is vm_fini and we really don't 
care for that special one.

Regards,
Christian.

>
> Regards,
> Eric
>>
>> Regards,
>> Christian.
>>
>>> amdgpu_vm_pt_next(adev, &cursor);
>>> -                params->table_freed = true;
>>>               }
>>>             } else if (frag >= shift) {
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed
  2021-05-31 14:08     ` Christian König
@ 2021-05-31 14:30       ` Eric Huang
  2021-05-31 19:37         ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Huang @ 2021-05-31 14:30 UTC (permalink / raw)
  To: Christian König, amd-gfx

On 2021-05-31 10:08 a.m., Christian König wrote:
>
>
> Am 30.05.21 um 20:29 schrieb Eric Huang:
>>
>> On 2021-05-30 12:54 p.m., Christian König wrote:
>>>
>>>
>>> Am 30.05.21 um 00:51 schrieb Eric Huang:
>>>> table_freed will be always true when mapping a memory with size
>>>> bigger than 2MB. The problem is page table's entries are always
>>>> existed, but existing mapping depends on page talbe's bo, so
>>>> using a check of page table's bo existed will resolve the issue.
>>>>
>>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++-
>>>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 0dee2e8797c7..95b94c95adac 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1582,9 +1582,11 @@ static int amdgpu_vm_update_ptes(struct 
>>>> amdgpu_vm_update_params *params,
>>>>                * completely covered by the range and so potentially 
>>>> still in use.
>>>>                */
>>>>               while (cursor.pfn < frag_start) {
>>>> +                /* Make sure previous mapping existed */
>>>> +                if (cursor.entry->base.bo)
>>>> +                    params->table_freed = true;
>>>
>>> In general this is the correct approach, but I would push that 
>>> decision into the amdgpu_vm_free_pts() function.
>>>
>>>> amdgpu_vm_free_pts(adev, params->vm, &cursor);
>>>
>>> So that we have here something like
>>>
>>> params->table_freed |= amdgpu_vm_free_pts(..);
>>>
>> Thank you for your review. I was thinking put the check into function 
>> amdgpu_vm_free_pts() since previous review, it will change returns of 
>> two functions amdgpu_vm_free_pts() and amdgpu_vm_free_table(). If the 
>> returns are not used by other functions, it seems make a simple 
>> change complex from my perspective. Can you share the reason of your 
>> suggestion?
>
> Because you can also optimize the bulk_moveable handling in that 
> function.
>
> E.g. bulk_moveable should only be set to false when a table was freed.
Makes sense. In terms of bulk_moveable, how about this:

             while (cursor.pfn < frag_start) {
                 /* Make sure previous mapping existed */
                 if (cursor.entry->base.bo) {
                     params->table_freed = true;
                     amdgpu_vm_free_pts(adev, params->vm, &cursor);
                 }
                 amdgpu_vm_pt_next(adev, &cursor);
             }

It should satisfy bulk_moveable flag, and also save freeing 
cursor.entry->entries, which are pre-allocated in the beginning of this 
function(amdgpu_vm_update_ptes), the pre-allocation can be saved if next 
mapping is a small page as well. It seems the most efficient approach 
for me.

Regards,
Eric
>
> The only case where this doesn't matter is vm_fini and we really don't 
> care for that special one.
>
> Regards,
> Christian.
>
>>
>> Regards,
>> Eric
>>>
>>> Regards,
>>> Christian.
>>>
>>>> amdgpu_vm_pt_next(adev, &cursor);
>>>> -                params->table_freed = true;
>>>>               }
>>>>             } else if (frag >= shift) {
>>>
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed
  2021-05-31 14:30       ` Eric Huang
@ 2021-05-31 19:37         ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2021-05-31 19:37 UTC (permalink / raw)
  To: Eric Huang, amd-gfx



Am 31.05.21 um 16:30 schrieb Eric Huang:
> On 2021-05-31 10:08 a.m., Christian König wrote:
>>
>>
>> Am 30.05.21 um 20:29 schrieb Eric Huang:
>>>
>>> On 2021-05-30 12:54 p.m., Christian König wrote:
>>>>
>>>>
>>>> Am 30.05.21 um 00:51 schrieb Eric Huang:
>>>>> table_freed will be always true when mapping a memory with size
>>>>> bigger than 2MB. The problem is page table's entries are always
>>>>> existed, but existing mapping depends on page talbe's bo, so
>>>>> using a check of page table's bo existed will resolve the issue.
>>>>>
>>>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++-
>>>>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 0dee2e8797c7..95b94c95adac 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -1582,9 +1582,11 @@ static int amdgpu_vm_update_ptes(struct 
>>>>> amdgpu_vm_update_params *params,
>>>>>                * completely covered by the range and so 
>>>>> potentially still in use.
>>>>>                */
>>>>>               while (cursor.pfn < frag_start) {
>>>>> +                /* Make sure previous mapping existed */
>>>>> +                if (cursor.entry->base.bo)
>>>>> +                    params->table_freed = true;
>>>>
>>>> In general this is the correct approach, but I would push that 
>>>> decision into the amdgpu_vm_free_pts() function.
>>>>
>>>>> amdgpu_vm_free_pts(adev, params->vm, &cursor);
>>>>
>>>> So that we have here something like
>>>>
>>>> params->table_freed |= amdgpu_vm_free_pts(..);
>>>>
>>> Thank you for your review. I was thinking put the check into 
>>> function amdgpu_vm_free_pts() since previous review, it will change 
>>> returns of two functions amdgpu_vm_free_pts() and 
>>> amdgpu_vm_free_table(). If the returns are not used by other 
>>> functions, it seems make a simple change complex from my 
>>> perspective. Can you share the reason of your suggestion?
>>
>> Because you can also optimize the bulk_moveable handling in that 
>> function.
>>
>> E.g. bulk_moveable should only be set to false when a table was freed.
> Makes sense. In terms of bulk_moveable, how about this:
>
>             while (cursor.pfn < frag_start) {
>                 /* Make sure previous mapping existed */
>                 if (cursor.entry->base.bo) {
>                     params->table_freed = true;
>                     amdgpu_vm_free_pts(adev, params->vm, &cursor);
>                 }
>                 amdgpu_vm_pt_next(adev, &cursor);
>             }
>
> It should satisfy bulk_moveable flag, and also save freeing 
> cursor.entry->entries, which are pre-allocated in the beginning of 
> this function(amdgpu_vm_update_ptes), the pre-allocation can be saved 
> if next mapping is a small page as well. It seems the most efficient 
> approach for me.

Works for me as well, I would just update the comment to read something 
like "Make sure previous mapping is freed".

Christian.

>
> Regards,
> Eric
>>
>> The only case where this doesn't matter is vm_fini and we really 
>> don't care for that special one.
>>
>> Regards,
>> Christian.
>>
>>>
>>> Regards,
>>> Eric
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> amdgpu_vm_pt_next(adev, &cursor);
>>>>> -                params->table_freed = true;
>>>>>               }
>>>>>             } else if (frag >= shift) {
>>>>
>>>
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: optimize memory mapping latency
  2021-05-29 22:51 ` [PATCH 2/2] drm/amdkfd: optimize memory mapping latency Eric Huang
@ 2021-06-01 16:05   ` Felix Kuehling
  2021-06-01 16:16     ` Felix Kuehling
  0 siblings, 1 reply; 9+ messages in thread
From: Felix Kuehling @ 2021-06-01 16:05 UTC (permalink / raw)
  To: Eric Huang, amd-gfx


Am 2021-05-29 um 6:51 p.m. schrieb Eric Huang:
> 1. conditionally flush TLBs after map.
> 2. add heavy weight TLBs flushing after unmap.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 24 +++++++++++--------
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  6 ++---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  4 ++--
>  8 files changed, 25 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 2560977760b3..997258c24ef2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -85,6 +85,7 @@ struct kgd_mem {
>  
>  	bool aql_queue;
>  	bool is_imported;
> +	bool table_freed;
>  };
>  
>  /* KFD Memory Eviction */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 385c33675227..8ac0d849fd3f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1132,6 +1132,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
>  		return ret;
>  	}
>  
> +	mem->table_freed = bo_va->table_freed;
> +

I think this should be

    mem->table_freed = mem->table_freed || bo_va->table_freed;

That way, on a multi-GPU system, mem->table_freed gets set to true if
any GPU freed a page table. Then somewhere, this needs to be reset to false.

However, that means, if one GPU frees a page table, all GPUs need to
flush, which may be unnecessary. A better alternative would be to do the
TLB flushing right here, only for the affected GPU, instead of returning
an aggregated "table_freed" all the way back to
kfd_ioctl_map_memory_to_gpu, which flushes all GPUs.

Finally, bo_va->table_freed is only used once, after the
amdgpu_vm_bo_update call returns. So there is no good reason to store
this permanently in the bo_va structure. It would be better to just add
an output parameter to amdgpu_vm_bo_update.

Regards,
  Felix


>  	return amdgpu_sync_fence(sync, bo_va->last_pt_update);
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 36e7f088d4ee..0e0f27f779cd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -87,6 +87,7 @@ struct amdgpu_bo_va {
>  	bool				cleared;
>  
>  	bool				is_xgmi;
> +	bool				table_freed;
>  };
>  
>  struct amdgpu_bo {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 95b94c95adac..ff3eb8395017 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1885,7 +1885,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>  						resv, mapping->start,
>  						mapping->last, update_flags,
>  						mapping->offset, mem,
> -						pages_addr, last_update, NULL,
> +						pages_addr, last_update, &bo_va->table_freed,
>  						vram_base_offset);
>  		if (r)
>  			return r;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 960913a35ee4..c45ccd1d03c0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1658,16 +1658,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  	}
>  
>  	/* Flush TLBs after waiting for the page table updates to complete */
> -	for (i = 0; i < args->n_devices; i++) {
> -		peer = kfd_device_by_id(devices_arr[i]);
> -		if (WARN_ON_ONCE(!peer))
> -			continue;
> -		peer_pdd = kfd_get_process_device_data(peer, p);
> -		if (WARN_ON_ONCE(!peer_pdd))
> -			continue;
> -		if (!amdgpu_read_lock(peer->ddev, true)) {
> -			kfd_flush_tlb(peer_pdd);
> -			amdgpu_read_unlock(peer->ddev);
> +	if (((struct kgd_mem *)mem)->table_freed) {
> +		for (i = 0; i < args->n_devices; i++) {
> +			peer = kfd_device_by_id(devices_arr[i]);
> +			if (WARN_ON_ONCE(!peer))
> +				continue;
> +			peer_pdd = kfd_get_process_device_data(peer, p);
> +			if (WARN_ON_ONCE(!peer_pdd))
> +				continue;
> +			if (!amdgpu_read_lock(peer->ddev, true)) {
> +				kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
> +				amdgpu_read_unlock(peer->ddev);
> +			}
>  		}
>  	}
>  
> @@ -1766,6 +1768,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>  			amdgpu_read_unlock(peer->ddev);
>  			goto unmap_memory_from_gpu_failed;
>  		}
> +		((struct kgd_mem *)mem)->table_freed = false;
> +		kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
>  		amdgpu_read_unlock(peer->ddev);
>  		args->n_success = i+1;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index c1bea1f7627b..a4920bc5cfbc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
>  			qpd->vmid,
>  			qpd->page_table_base);
>  	/* invalidate the VM context after pasid and vmid mapping is set up */
> -	kfd_flush_tlb(qpd_to_pdd(qpd));
> +	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>  
>  	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
>  		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
> @@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>  		if (flush_texture_cache_nocpsch(q->device, qpd))
>  			pr_err("Failed to flush TC\n");
>  
> -	kfd_flush_tlb(qpd_to_pdd(qpd));
> +	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>  
>  	/* Release the vmid mapping */
>  	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
> @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>  				dqm->dev->kgd,
>  				qpd->vmid,
>  				qpd->page_table_base);
> -		kfd_flush_tlb(pdd);
> +		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
>  	}
>  
>  	/* Take a safe reference to the mm_struct, which may otherwise
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index ecdd5e782b81..edce3ecf207d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>  
>  void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
>  
> -void kfd_flush_tlb(struct kfd_process_device *pdd);
> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
>  
>  int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 4ab9da288f90..a03373743a3d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -2161,7 +2161,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>  			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
>  }
>  
> -void kfd_flush_tlb(struct kfd_process_device *pdd)
> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
>  {
>  	struct kfd_dev *dev = pdd->dev;
>  
> @@ -2174,7 +2174,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
>  							pdd->qpd.vmid);
>  	} else {
>  		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> -					pdd->process->pasid, TLB_FLUSH_LEGACY);
> +					pdd->process->pasid, type);
>  	}
>  }
>  
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: optimize memory mapping latency
  2021-06-01 16:05   ` Felix Kuehling
@ 2021-06-01 16:16     ` Felix Kuehling
  0 siblings, 0 replies; 9+ messages in thread
From: Felix Kuehling @ 2021-06-01 16:16 UTC (permalink / raw)
  To: Eric Huang, amd-gfx


Am 2021-06-01 um 12:05 p.m. schrieb Felix Kuehling:
> Am 2021-05-29 um 6:51 p.m. schrieb Eric Huang:
>> 1. conditionally flush TLBs after map.
>> 2. add heavy weight TLBs flushing after unmap.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 ++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |  1 +
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  2 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 24 +++++++++++--------
>>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  6 ++---
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  4 ++--
>>  8 files changed, 25 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 2560977760b3..997258c24ef2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -85,6 +85,7 @@ struct kgd_mem {
>>  
>>  	bool aql_queue;
>>  	bool is_imported;
>> +	bool table_freed;
>>  };
>>  
>>  /* KFD Memory Eviction */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 385c33675227..8ac0d849fd3f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -1132,6 +1132,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
>>  		return ret;
>>  	}
>>  
>> +	mem->table_freed = bo_va->table_freed;
>> +
> I think this should be
>
>     mem->table_freed = mem->table_freed || bo_va->table_freed;
>
> That way, on a multi-GPU system, mem->table_freed gets set to true if
> any GPU freed a page table. Then somewhere, this needs to be reset to false.
>
> However, that means, if one GPU frees a page table, all GPUs need to
> flush, which may be unnecessary. A better alternative would be to do the
> TLB flushing right here, only for the affected GPU, instead of returning
> an aggregated "table_freed" all the way back to
> kfd_ioctl_map_memory_to_gpu, which flushes all GPUs.

On second thought, in this function you don't know the PASID to flush,
and you don't know whether to flush a PASID (with HWS) or a VMID
(without HWS). Also, we don't wait here for the mapping to complete, so
this is not the right place to flush.

So you probably do need to return this all the way to
kfd_ioctl_map_memory_to_gpu after all. And the flush has to be after the
amdgpu_amdkfd_gpuvm_sync_memory. So flushing only the affected GPUs is
getting more difficult. If the flush is rare enough, it doesn't matter.

So I'm OK with your proposed solution of flushing all GPUs in
kfd_ioctl_map_memory_to_gpu. Just make sure you OR together the
table_freed flags from all the GPUs, and reset it to false before you
start mapping. Also, you probably don't need to store this permanently
in the kgd_mem structure. Just add an output parameter to
amdgpu_amdkfd_gpuvm_map_memory_to_gpu.

Regards,
  Felix


>
> Finally, bo_va->table_freed is only used once, after the
> amdgpu_vm_bo_update call returns. So there is no good reason to store
> this permanently in the bo_va structure. It would be better to just add
> an output parameter to amdgpu_vm_bo_update.
>
> Regards,
>   Felix
>
>
>>  	return amdgpu_sync_fence(sync, bo_va->last_pt_update);
>>  }
>>  
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 36e7f088d4ee..0e0f27f779cd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -87,6 +87,7 @@ struct amdgpu_bo_va {
>>  	bool				cleared;
>>  
>>  	bool				is_xgmi;
>> +	bool				table_freed;
>>  };
>>  
>>  struct amdgpu_bo {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 95b94c95adac..ff3eb8395017 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1885,7 +1885,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>  						resv, mapping->start,
>>  						mapping->last, update_flags,
>>  						mapping->offset, mem,
>> -						pages_addr, last_update, NULL,
>> +						pages_addr, last_update, &bo_va->table_freed,
>>  						vram_base_offset);
>>  		if (r)
>>  			return r;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 960913a35ee4..c45ccd1d03c0 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -1658,16 +1658,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>>  	}
>>  
>>  	/* Flush TLBs after waiting for the page table updates to complete */
>> -	for (i = 0; i < args->n_devices; i++) {
>> -		peer = kfd_device_by_id(devices_arr[i]);
>> -		if (WARN_ON_ONCE(!peer))
>> -			continue;
>> -		peer_pdd = kfd_get_process_device_data(peer, p);
>> -		if (WARN_ON_ONCE(!peer_pdd))
>> -			continue;
>> -		if (!amdgpu_read_lock(peer->ddev, true)) {
>> -			kfd_flush_tlb(peer_pdd);
>> -			amdgpu_read_unlock(peer->ddev);
>> +	if (((struct kgd_mem *)mem)->table_freed) {
>> +		for (i = 0; i < args->n_devices; i++) {
>> +			peer = kfd_device_by_id(devices_arr[i]);
>> +			if (WARN_ON_ONCE(!peer))
>> +				continue;
>> +			peer_pdd = kfd_get_process_device_data(peer, p);
>> +			if (WARN_ON_ONCE(!peer_pdd))
>> +				continue;
>> +			if (!amdgpu_read_lock(peer->ddev, true)) {
>> +				kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
>> +				amdgpu_read_unlock(peer->ddev);
>> +			}
>>  		}
>>  	}
>>  
>> @@ -1766,6 +1768,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>  			amdgpu_read_unlock(peer->ddev);
>>  			goto unmap_memory_from_gpu_failed;
>>  		}
>> +		((struct kgd_mem *)mem)->table_freed = false;
>> +		kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
>>  		amdgpu_read_unlock(peer->ddev);
>>  		args->n_success = i+1;
>>  	}
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index c1bea1f7627b..a4920bc5cfbc 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
>>  			qpd->vmid,
>>  			qpd->page_table_base);
>>  	/* invalidate the VM context after pasid and vmid mapping is set up */
>> -	kfd_flush_tlb(qpd_to_pdd(qpd));
>> +	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>>  
>>  	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
>>  		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
>> @@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>>  		if (flush_texture_cache_nocpsch(q->device, qpd))
>>  			pr_err("Failed to flush TC\n");
>>  
>> -	kfd_flush_tlb(qpd_to_pdd(qpd));
>> +	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>>  
>>  	/* Release the vmid mapping */
>>  	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>> @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>>  				dqm->dev->kgd,
>>  				qpd->vmid,
>>  				qpd->page_table_base);
>> -		kfd_flush_tlb(pdd);
>> +		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
>>  	}
>>  
>>  	/* Take a safe reference to the mm_struct, which may otherwise
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index ecdd5e782b81..edce3ecf207d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>>  
>>  void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
>>  
>> -void kfd_flush_tlb(struct kfd_process_device *pdd);
>> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
>>  
>>  int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
>>  
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 4ab9da288f90..a03373743a3d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -2161,7 +2161,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>>  			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
>>  }
>>  
>> -void kfd_flush_tlb(struct kfd_process_device *pdd)
>> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
>>  {
>>  	struct kfd_dev *dev = pdd->dev;
>>  
>> @@ -2174,7 +2174,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
>>  							pdd->qpd.vmid);
>>  	} else {
>>  		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
>> -					pdd->process->pasid, TLB_FLUSH_LEGACY);
>> +					pdd->process->pasid, type);
>>  	}
>>  }
>>  
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-06-01 16:16 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-29 22:51 [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed Eric Huang
2021-05-29 22:51 ` [PATCH 2/2] drm/amdkfd: optimize memory mapping latency Eric Huang
2021-06-01 16:05   ` Felix Kuehling
2021-06-01 16:16     ` Felix Kuehling
2021-05-30 16:54 ` [PATCH 1/2] drm/amdgpu: Fix a bug on flag table_freed Christian König
2021-05-30 18:29   ` Eric Huang
2021-05-31 14:08     ` Christian König
2021-05-31 14:30       ` Eric Huang
2021-05-31 19:37         ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.