* [PATCH v2] drm/amdkfd: optimize memory mapping latency
@ 2021-06-01 18:50 Eric Huang
2021-06-01 19:09 ` Felix Kuehling
0 siblings, 1 reply; 2+ messages in thread
From: Eric Huang @ 2021-06-01 18:50 UTC (permalink / raw)
To: amd-gfx; +Cc: Eric Huang
1. conditionally flush TLBs after map.
2. add heavy weight TLBs flushing after unmap.
Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +-
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 21 ++++++++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 27 +++++++++++--------
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++---
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 ++---
8 files changed, 41 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2560977760b3..8f2d6711e12f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -280,7 +280,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
uint64_t *size);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *flush_tlb);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1fcfa172911a..14c8e23c68b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1117,7 +1117,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
static int update_gpuvm_pte(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
- struct amdgpu_sync *sync)
+ struct amdgpu_sync *sync,
+ bool *flush_tlb)
{
struct amdgpu_bo_va *bo_va = entry->bo_va;
struct amdgpu_device *adev = entry->adev;
@@ -1127,6 +1128,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
if (ret)
return ret;
+ bo_va->table_freed = false;
/* Update the page tables */
ret = amdgpu_vm_bo_update(adev, bo_va, false);
if (ret) {
@@ -1134,13 +1136,17 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
+ if (flush_tlb)
+ *flush_tlb = *flush_tlb || bo_va->table_freed;
+
return amdgpu_sync_fence(sync, bo_va->last_pt_update);
}
static int map_bo_to_gpuvm(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync,
- bool no_update_pte)
+ bool no_update_pte,
+ bool *flush_tlb)
{
int ret;
@@ -1157,7 +1163,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
if (no_update_pte)
return 0;
- ret = update_gpuvm_pte(mem, entry, sync);
+ ret = update_gpuvm_pte(mem, entry, sync, flush_tlb);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
goto update_gpuvm_pte_failed;
@@ -1687,7 +1693,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
+ struct kgd_dev *kgd, struct kgd_mem *mem,
+ void *drm_priv, bool *flush_tlb)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1775,7 +1782,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size, entry);
ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
- is_invalid_userptr);
+ is_invalid_userptr, flush_tlb);
if (ret) {
pr_err("Failed to map bo to gpuvm\n");
goto out_unreserve;
@@ -2469,7 +2476,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
continue;
kfd_mem_dmaunmap_attachment(mem, attachment);
- ret = update_gpuvm_pte(mem, attachment, &sync);
+ ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
if (ret) {
pr_err("%s: update PTE failed\n", __func__);
/* make sure this gets validated again */
@@ -2675,7 +2682,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
continue;
kfd_mem_dmaunmap_attachment(mem, attachment);
- ret = update_gpuvm_pte(mem, attachment, &sync_obj);
+ ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
if (ret) {
pr_debug("Memory eviction: update PTE failed. Try again\n");
goto validate_map_fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index ba1cf66e9172..d987acfa80a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -87,6 +87,7 @@ struct amdgpu_bo_va {
bool cleared;
bool is_xgmi;
+ bool table_freed;
};
struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2c20bba7dc1a..03bfa1eba196 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1887,7 +1887,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
resv, mapping->start,
mapping->last, update_flags,
mapping->offset, mem,
- pages_addr, last_update, NULL,
+ pages_addr, last_update, &bo_va->table_freed,
vram_base_offset);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 960913a35ee4..136f77cadc2f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1574,6 +1574,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
long err = 0;
int i;
uint32_t *devices_arr = NULL;
+ bool flush_tlb = false;
trace_kfd_map_memory_to_gpu_start(p);
dev = kfd_device_by_id(GET_GPU_ID(args->handle));
@@ -1637,7 +1638,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
goto map_memory_to_gpu_failed;
err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+ peer->kgd, (struct kgd_mem *)mem,
+ peer_pdd->drm_priv, &flush_tlb);
if (err) {
pr_err("Failed to map to gpu %d/%d\n",
i, args->n_devices);
@@ -1658,16 +1660,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
/* Flush TLBs after waiting for the page table updates to complete */
- for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
- if (WARN_ON_ONCE(!peer_pdd))
- continue;
- if (!amdgpu_read_lock(peer->ddev, true)) {
- kfd_flush_tlb(peer_pdd);
- amdgpu_read_unlock(peer->ddev);
+ if (flush_tlb) {
+ for (i = 0; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (WARN_ON_ONCE(!peer))
+ continue;
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ if (!amdgpu_read_lock(peer->ddev, true)) {
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+ amdgpu_read_unlock(peer->ddev);
+ }
}
}
@@ -1766,6 +1770,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
amdgpu_read_unlock(peer->ddev);
goto unmap_memory_from_gpu_failed;
}
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
amdgpu_read_unlock(peer->ddev);
args->n_success = i+1;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 2bd621eee4e0..904b8178c1d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
- kfd_flush_tlb(qpd_to_pdd(qpd));
+ kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
if (dqm->dev->kfd2kgd->set_scratch_backing_va)
dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
@@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
if (flush_texture_cache_nocpsch(q->device, qpd))
pr_err("Failed to flush TC\n");
- kfd_flush_tlb(qpd_to_pdd(qpd));
+ kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
dqm->dev->kgd,
qpd->vmid,
qpd->page_table_base);
- kfd_flush_tlb(pdd);
+ kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
}
/* Take a safe reference to the mm_struct, which may otherwise
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ecdd5e782b81..edce3ecf207d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
-void kfd_flush_tlb(struct kfd_process_device *pdd);
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3995002c582b..9708214116dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -689,7 +689,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
if (err)
goto err_alloc_mem;
- err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
+ err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
+ pdd->drm_priv, NULL);
if (err)
goto err_map_mem;
@@ -2159,7 +2160,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
}
-void kfd_flush_tlb(struct kfd_process_device *pdd)
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
{
struct kfd_dev *dev = pdd->dev;
@@ -2172,7 +2173,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
pdd->qpd.vmid);
} else {
amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
- pdd->process->pasid, TLB_FLUSH_LEGACY);
+ pdd->process->pasid, type);
}
}
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v2] drm/amdkfd: optimize memory mapping latency
2021-06-01 18:50 [PATCH v2] drm/amdkfd: optimize memory mapping latency Eric Huang
@ 2021-06-01 19:09 ` Felix Kuehling
0 siblings, 0 replies; 2+ messages in thread
From: Felix Kuehling @ 2021-06-01 19:09 UTC (permalink / raw)
To: Eric Huang, amd-gfx
Am 2021-06-01 um 2:50 p.m. schrieb Eric Huang:
> 1. conditionally flush TLBs after map.
> 2. add heavy weight TLBs flushing after unmap.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 21 ++++++++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 27 +++++++++++--------
> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++---
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 ++---
> 8 files changed, 41 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 2560977760b3..8f2d6711e12f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -280,7 +280,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
> uint64_t *size);
> int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
> - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
> + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *flush_tlb);
> int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
> struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
> int amdgpu_amdkfd_gpuvm_sync_memory(
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 1fcfa172911a..14c8e23c68b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1117,7 +1117,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
>
> static int update_gpuvm_pte(struct kgd_mem *mem,
> struct kfd_mem_attachment *entry,
> - struct amdgpu_sync *sync)
> + struct amdgpu_sync *sync,
> + bool *flush_tlb)
> {
> struct amdgpu_bo_va *bo_va = entry->bo_va;
> struct amdgpu_device *adev = entry->adev;
> @@ -1127,6 +1128,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
> if (ret)
> return ret;
>
> + bo_va->table_freed = false;
> /* Update the page tables */
> ret = amdgpu_vm_bo_update(adev, bo_va, false);
> if (ret) {
> @@ -1134,13 +1136,17 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
> return ret;
> }
>
> + if (flush_tlb)
> + *flush_tlb = *flush_tlb || bo_va->table_freed;
> +
> return amdgpu_sync_fence(sync, bo_va->last_pt_update);
> }
>
> static int map_bo_to_gpuvm(struct kgd_mem *mem,
> struct kfd_mem_attachment *entry,
> struct amdgpu_sync *sync,
> - bool no_update_pte)
> + bool no_update_pte,
> + bool *flush_tlb)
> {
> int ret;
>
> @@ -1157,7 +1163,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
> if (no_update_pte)
> return 0;
>
> - ret = update_gpuvm_pte(mem, entry, sync);
> + ret = update_gpuvm_pte(mem, entry, sync, flush_tlb);
> if (ret) {
> pr_err("update_gpuvm_pte() failed\n");
> goto update_gpuvm_pte_failed;
> @@ -1687,7 +1693,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> }
>
> int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
> - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
> + struct kgd_dev *kgd, struct kgd_mem *mem,
> + void *drm_priv, bool *flush_tlb)
> {
> struct amdgpu_device *adev = get_amdgpu_device(kgd);
> struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> @@ -1775,7 +1782,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
> entry->va, entry->va + bo_size, entry);
>
> ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
> - is_invalid_userptr);
> + is_invalid_userptr, flush_tlb);
> if (ret) {
> pr_err("Failed to map bo to gpuvm\n");
> goto out_unreserve;
> @@ -2469,7 +2476,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
> continue;
>
> kfd_mem_dmaunmap_attachment(mem, attachment);
> - ret = update_gpuvm_pte(mem, attachment, &sync);
> + ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
> if (ret) {
> pr_err("%s: update PTE failed\n", __func__);
> /* make sure this gets validated again */
> @@ -2675,7 +2682,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
> continue;
>
> kfd_mem_dmaunmap_attachment(mem, attachment);
> - ret = update_gpuvm_pte(mem, attachment, &sync_obj);
> + ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
> if (ret) {
> pr_debug("Memory eviction: update PTE failed. Try again\n");
> goto validate_map_fail;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index ba1cf66e9172..d987acfa80a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -87,6 +87,7 @@ struct amdgpu_bo_va {
> bool cleared;
>
> bool is_xgmi;
> + bool table_freed;
I still don't see the need to add this flag to the bo_va. If we did need
to save this persistently, it would make more sense in struct amdgpu_vm.
But I still think it should just be an output parameter of
amdgpu_vm_bo_update.
Other than that, this patch looks good to me now.
Regards,
Felix
> };
>
> struct amdgpu_bo {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 2c20bba7dc1a..03bfa1eba196 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1887,7 +1887,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> resv, mapping->start,
> mapping->last, update_flags,
> mapping->offset, mem,
> - pages_addr, last_update, NULL,
> + pages_addr, last_update, &bo_va->table_freed,
> vram_base_offset);
> if (r)
> return r;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 960913a35ee4..136f77cadc2f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1574,6 +1574,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
> long err = 0;
> int i;
> uint32_t *devices_arr = NULL;
> + bool flush_tlb = false;
>
> trace_kfd_map_memory_to_gpu_start(p);
> dev = kfd_device_by_id(GET_GPU_ID(args->handle));
> @@ -1637,7 +1638,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
> goto map_memory_to_gpu_failed;
>
> err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
> - peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
> + peer->kgd, (struct kgd_mem *)mem,
> + peer_pdd->drm_priv, &flush_tlb);
> if (err) {
> pr_err("Failed to map to gpu %d/%d\n",
> i, args->n_devices);
> @@ -1658,16 +1660,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
> }
>
> /* Flush TLBs after waiting for the page table updates to complete */
> - for (i = 0; i < args->n_devices; i++) {
> - peer = kfd_device_by_id(devices_arr[i]);
> - if (WARN_ON_ONCE(!peer))
> - continue;
> - peer_pdd = kfd_get_process_device_data(peer, p);
> - if (WARN_ON_ONCE(!peer_pdd))
> - continue;
> - if (!amdgpu_read_lock(peer->ddev, true)) {
> - kfd_flush_tlb(peer_pdd);
> - amdgpu_read_unlock(peer->ddev);
> + if (flush_tlb) {
> + for (i = 0; i < args->n_devices; i++) {
> + peer = kfd_device_by_id(devices_arr[i]);
> + if (WARN_ON_ONCE(!peer))
> + continue;
> + peer_pdd = kfd_get_process_device_data(peer, p);
> + if (WARN_ON_ONCE(!peer_pdd))
> + continue;
> + if (!amdgpu_read_lock(peer->ddev, true)) {
> + kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
> + amdgpu_read_unlock(peer->ddev);
> + }
> }
> }
>
> @@ -1766,6 +1770,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
> amdgpu_read_unlock(peer->ddev);
> goto unmap_memory_from_gpu_failed;
> }
> + kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
> amdgpu_read_unlock(peer->ddev);
> args->n_success = i+1;
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 2bd621eee4e0..904b8178c1d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
> qpd->vmid,
> qpd->page_table_base);
> /* invalidate the VM context after pasid and vmid mapping is set up */
> - kfd_flush_tlb(qpd_to_pdd(qpd));
> + kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>
> if (dqm->dev->kfd2kgd->set_scratch_backing_va)
> dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
> @@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
> if (flush_texture_cache_nocpsch(q->device, qpd))
> pr_err("Failed to flush TC\n");
>
> - kfd_flush_tlb(qpd_to_pdd(qpd));
> + kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>
> /* Release the vmid mapping */
> set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
> @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
> dqm->dev->kgd,
> qpd->vmid,
> qpd->page_table_base);
> - kfd_flush_tlb(pdd);
> + kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
> }
>
> /* Take a safe reference to the mm_struct, which may otherwise
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index ecdd5e782b81..edce3ecf207d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>
> void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
>
> -void kfd_flush_tlb(struct kfd_process_device *pdd);
> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
>
> int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 3995002c582b..9708214116dc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -689,7 +689,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
> if (err)
> goto err_alloc_mem;
>
> - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
> + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
> + pdd->drm_priv, NULL);
> if (err)
> goto err_map_mem;
>
> @@ -2159,7 +2160,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
> KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
> }
>
> -void kfd_flush_tlb(struct kfd_process_device *pdd)
> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
> {
> struct kfd_dev *dev = pdd->dev;
>
> @@ -2172,7 +2173,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
> pdd->qpd.vmid);
> } else {
> amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> - pdd->process->pasid, TLB_FLUSH_LEGACY);
> + pdd->process->pasid, type);
> }
> }
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-06-01 19:09 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-01 18:50 [PATCH v2] drm/amdkfd: optimize memory mapping latency Eric Huang
2021-06-01 19:09 ` Felix Kuehling
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.