All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] drm/amdkfd: optimize memory mapping latency
@ 2021-06-01 21:30 Eric Huang
  2021-06-01 21:56 ` Felix Kuehling
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Huang @ 2021-06-01 21:30 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang

1. conditionally flush TLBs after map.
2. add heavy weight TLBs flush after unmap.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 19 +++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  6 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 10 +++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 27 +++++++++++--------
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  6 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  7 ++---
 10 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2560977760b3..8f2d6711e12f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -280,7 +280,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
 		uint64_t *size);
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *flush_tlb);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_sync_memory(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1fcfa172911a..585b50b6009f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1117,7 +1117,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
 
 static int update_gpuvm_pte(struct kgd_mem *mem,
 			    struct kfd_mem_attachment *entry,
-			    struct amdgpu_sync *sync)
+			    struct amdgpu_sync *sync,
+			    bool *flush_tlb)
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
 	struct amdgpu_device *adev = entry->adev;
@@ -1128,7 +1129,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
 		return ret;
 
 	/* Update the page tables  */
-	ret = amdgpu_vm_bo_update(adev, bo_va, false);
+	ret = amdgpu_vm_bo_update(adev, bo_va, false, flush_tlb);
 	if (ret) {
 		pr_err("amdgpu_vm_bo_update failed\n");
 		return ret;
@@ -1140,7 +1141,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
 static int map_bo_to_gpuvm(struct kgd_mem *mem,
 			   struct kfd_mem_attachment *entry,
 			   struct amdgpu_sync *sync,
-			   bool no_update_pte)
+			   bool no_update_pte,
+			   bool *flush_tlb)
 {
 	int ret;
 
@@ -1157,7 +1159,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
 	if (no_update_pte)
 		return 0;
 
-	ret = update_gpuvm_pte(mem, entry, sync);
+	ret = update_gpuvm_pte(mem, entry, sync, flush_tlb);
 	if (ret) {
 		pr_err("update_gpuvm_pte() failed\n");
 		goto update_gpuvm_pte_failed;
@@ -1687,7 +1689,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 }
 
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
+		struct kgd_dev *kgd, struct kgd_mem *mem,
+		void *drm_priv, bool *flush_tlb)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1775,7 +1778,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			 entry->va, entry->va + bo_size, entry);
 
 		ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
-				      is_invalid_userptr);
+				      is_invalid_userptr, flush_tlb);
 		if (ret) {
 			pr_err("Failed to map bo to gpuvm\n");
 			goto out_unreserve;
@@ -2469,7 +2472,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 				continue;
 
 			kfd_mem_dmaunmap_attachment(mem, attachment);
-			ret = update_gpuvm_pte(mem, attachment, &sync);
+			ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
 			if (ret) {
 				pr_err("%s: update PTE failed\n", __func__);
 				/* make sure this gets validated again */
@@ -2675,7 +2678,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 				continue;
 
 			kfd_mem_dmaunmap_attachment(mem, attachment);
-			ret = update_gpuvm_pte(mem, attachment, &sync_obj);
+			ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
 			if (ret) {
 				pr_debug("Memory eviction: update PTE failed. Try again\n");
 				goto validate_map_fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e9f9f462a652..e3df132e53a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -916,7 +916,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
 	if (r)
 		return r;
 
@@ -927,7 +927,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 		bo_va = fpriv->csa_va;
 		BUG_ON(!bo_va);
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 
@@ -946,7 +946,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		if (bo_va == NULL)
 			continue;
 
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 2120a87a949f..eac2fd0048cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -696,7 +696,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 
 	if (operation == AMDGPU_VA_OP_MAP ||
 	    operation == AMDGPU_VA_OP_REPLACE) {
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			goto error;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2c20bba7dc1a..fed3d44b5ded 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1729,7 +1729,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	r = vm->update_funcs->commit(&params, fence);
 
 	if (table_freed)
-		*table_freed = params.table_freed;
+		*table_freed = *table_freed || params.table_freed;
 
 error_unlock:
 	amdgpu_vm_eviction_unlock(vm);
@@ -1793,7 +1793,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
  * 0 for success, -EINVAL for failure.
  */
 int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
-			bool clear)
+			bool clear, bool *flush_tlb)
 {
 	struct amdgpu_bo *bo = bo_va->base.bo;
 	struct amdgpu_vm *vm = bo_va->base.vm;
@@ -1887,7 +1887,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 						resv, mapping->start,
 						mapping->last, update_flags,
 						mapping->offset, mem,
-						pages_addr, last_update, NULL,
+						pages_addr, last_update, flush_tlb,
 						vram_base_offset);
 		if (r)
 			return r;
@@ -2141,7 +2141,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 
 	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
 		/* Per VM BOs never need to bo cleared in the page tables */
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 	}
@@ -2160,7 +2160,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 		else
 			clear = true;
 
-		r = amdgpu_vm_bo_update(adev, bo_va, clear);
+		r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
 		if (r)
 			return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 67bba8462e7d..24a63e284a69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -419,7 +419,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
-			bool clear);
+			bool clear, bool *flush_tlb);
 bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			     struct amdgpu_bo *bo, bool evicted);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 960913a35ee4..136f77cadc2f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1574,6 +1574,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 	long err = 0;
 	int i;
 	uint32_t *devices_arr = NULL;
+	bool flush_tlb = false;
 
 	trace_kfd_map_memory_to_gpu_start(p);
 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
@@ -1637,7 +1638,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 			goto map_memory_to_gpu_failed;
 
 		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+			peer->kgd, (struct kgd_mem *)mem,
+			peer_pdd->drm_priv, &flush_tlb);
 		if (err) {
 			pr_err("Failed to map to gpu %d/%d\n",
 			       i, args->n_devices);
@@ -1658,16 +1660,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 	}
 
 	/* Flush TLBs after waiting for the page table updates to complete */
-	for (i = 0; i < args->n_devices; i++) {
-		peer = kfd_device_by_id(devices_arr[i]);
-		if (WARN_ON_ONCE(!peer))
-			continue;
-		peer_pdd = kfd_get_process_device_data(peer, p);
-		if (WARN_ON_ONCE(!peer_pdd))
-			continue;
-		if (!amdgpu_read_lock(peer->ddev, true)) {
-			kfd_flush_tlb(peer_pdd);
-			amdgpu_read_unlock(peer->ddev);
+	if (flush_tlb) {
+		for (i = 0; i < args->n_devices; i++) {
+			peer = kfd_device_by_id(devices_arr[i]);
+			if (WARN_ON_ONCE(!peer))
+				continue;
+			peer_pdd = kfd_get_process_device_data(peer, p);
+			if (WARN_ON_ONCE(!peer_pdd))
+				continue;
+			if (!amdgpu_read_lock(peer->ddev, true)) {
+				kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+				amdgpu_read_unlock(peer->ddev);
+			}
 		}
 	}
 
@@ -1766,6 +1770,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 			amdgpu_read_unlock(peer->ddev);
 			goto unmap_memory_from_gpu_failed;
 		}
+		kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
 		amdgpu_read_unlock(peer->ddev);
 		args->n_success = i+1;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 2bd621eee4e0..904b8178c1d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 			qpd->vmid,
 			qpd->page_table_base);
 	/* invalidate the VM context after pasid and vmid mapping is set up */
-	kfd_flush_tlb(qpd_to_pdd(qpd));
+	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
 
 	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
 		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
@@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 		if (flush_texture_cache_nocpsch(q->device, qpd))
 			pr_err("Failed to flush TC\n");
 
-	kfd_flush_tlb(qpd_to_pdd(qpd));
+	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
 
 	/* Release the vmid mapping */
 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 				dqm->dev->kgd,
 				qpd->vmid,
 				qpd->page_table_base);
-		kfd_flush_tlb(pdd);
+		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
 	}
 
 	/* Take a safe reference to the mm_struct, which may otherwise
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ecdd5e782b81..edce3ecf207d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
 
 void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
 
-void kfd_flush_tlb(struct kfd_process_device *pdd);
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
 
 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3995002c582b..9708214116dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -689,7 +689,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 	if (err)
 		goto err_alloc_mem;
 
-	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
+	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
+			pdd->drm_priv, NULL);
 	if (err)
 		goto err_map_mem;
 
@@ -2159,7 +2160,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
 
-void kfd_flush_tlb(struct kfd_process_device *pdd)
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
 {
 	struct kfd_dev *dev = pdd->dev;
 
@@ -2172,7 +2173,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
 							pdd->qpd.vmid);
 	} else {
 		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
-					pdd->process->pasid, TLB_FLUSH_LEGACY);
+					pdd->process->pasid, type);
 	}
 }
 
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v3] drm/amdkfd: optimize memory mapping latency
  2021-06-01 21:30 [PATCH v3] drm/amdkfd: optimize memory mapping latency Eric Huang
@ 2021-06-01 21:56 ` Felix Kuehling
  0 siblings, 0 replies; 2+ messages in thread
From: Felix Kuehling @ 2021-06-01 21:56 UTC (permalink / raw)
  To: Eric Huang, amd-gfx

Am 2021-06-01 um 5:30 p.m. schrieb Eric Huang:
> 1. conditionally flush TLBs after map.
> 2. add heavy weight TLBs flush after unmap.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>

The code looks good to me. But it's mixing a bit too much in a single
commit, which makes it hard to see what's really changing. We should
break this into a few commits to separate refactoring from actual
functional changes:

 1. Add flush-type parameter to kfd_flush_tlb
 2. Add heavy-weight TLB flush after unmapping
 3. Add table_freed parameter to amdgpu_vm_bo_update mapping
 4. Make TLB-flush after mapping conditional on table_freed parameter

1 and 3 are just refactoring. 2 and 4 change functionality. When
reviewing those four commits, it should be easy to see that 1 and 3
don't change any functionality. And the functional changes in 2 and 4
should be really clear and obvious.

Regards,
  Felix


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 19 +++++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  6 ++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 10 +++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 27 +++++++++++--------
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  6 ++---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  7 ++---
>  10 files changed, 46 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 2560977760b3..8f2d6711e12f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -280,7 +280,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>  		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
>  		uint64_t *size);
>  int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
> -		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
> +		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *flush_tlb);
>  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>  		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
>  int amdgpu_amdkfd_gpuvm_sync_memory(
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 1fcfa172911a..585b50b6009f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1117,7 +1117,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
>  
>  static int update_gpuvm_pte(struct kgd_mem *mem,
>  			    struct kfd_mem_attachment *entry,
> -			    struct amdgpu_sync *sync)
> +			    struct amdgpu_sync *sync,
> +			    bool *flush_tlb)
>  {
>  	struct amdgpu_bo_va *bo_va = entry->bo_va;
>  	struct amdgpu_device *adev = entry->adev;
> @@ -1128,7 +1129,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
>  		return ret;
>  
>  	/* Update the page tables  */
> -	ret = amdgpu_vm_bo_update(adev, bo_va, false);
> +	ret = amdgpu_vm_bo_update(adev, bo_va, false, flush_tlb);
>  	if (ret) {
>  		pr_err("amdgpu_vm_bo_update failed\n");
>  		return ret;
> @@ -1140,7 +1141,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
>  static int map_bo_to_gpuvm(struct kgd_mem *mem,
>  			   struct kfd_mem_attachment *entry,
>  			   struct amdgpu_sync *sync,
> -			   bool no_update_pte)
> +			   bool no_update_pte,
> +			   bool *flush_tlb)
>  {
>  	int ret;
>  
> @@ -1157,7 +1159,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
>  	if (no_update_pte)
>  		return 0;
>  
> -	ret = update_gpuvm_pte(mem, entry, sync);
> +	ret = update_gpuvm_pte(mem, entry, sync, flush_tlb);
>  	if (ret) {
>  		pr_err("update_gpuvm_pte() failed\n");
>  		goto update_gpuvm_pte_failed;
> @@ -1687,7 +1689,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>  }
>  
>  int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
> -		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
> +		struct kgd_dev *kgd, struct kgd_mem *mem,
> +		void *drm_priv, bool *flush_tlb)
>  {
>  	struct amdgpu_device *adev = get_amdgpu_device(kgd);
>  	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> @@ -1775,7 +1778,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>  			 entry->va, entry->va + bo_size, entry);
>  
>  		ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
> -				      is_invalid_userptr);
> +				      is_invalid_userptr, flush_tlb);
>  		if (ret) {
>  			pr_err("Failed to map bo to gpuvm\n");
>  			goto out_unreserve;
> @@ -2469,7 +2472,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>  				continue;
>  
>  			kfd_mem_dmaunmap_attachment(mem, attachment);
> -			ret = update_gpuvm_pte(mem, attachment, &sync);
> +			ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
>  			if (ret) {
>  				pr_err("%s: update PTE failed\n", __func__);
>  				/* make sure this gets validated again */
> @@ -2675,7 +2678,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>  				continue;
>  
>  			kfd_mem_dmaunmap_attachment(mem, attachment);
> -			ret = update_gpuvm_pte(mem, attachment, &sync_obj);
> +			ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
>  			if (ret) {
>  				pr_debug("Memory eviction: update PTE failed. Try again\n");
>  				goto validate_map_fail;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index e9f9f462a652..e3df132e53a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -916,7 +916,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>  	if (r)
>  		return r;
>  
> -	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
> +	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
>  	if (r)
>  		return r;
>  
> @@ -927,7 +927,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>  	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
>  		bo_va = fpriv->csa_va;
>  		BUG_ON(!bo_va);
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>  		if (r)
>  			return r;
>  
> @@ -946,7 +946,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>  		if (bo_va == NULL)
>  			continue;
>  
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>  		if (r)
>  			return r;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 2120a87a949f..eac2fd0048cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -696,7 +696,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>  
>  	if (operation == AMDGPU_VA_OP_MAP ||
>  	    operation == AMDGPU_VA_OP_REPLACE) {
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>  		if (r)
>  			goto error;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 2c20bba7dc1a..fed3d44b5ded 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1729,7 +1729,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  	r = vm->update_funcs->commit(&params, fence);
>  
>  	if (table_freed)
> -		*table_freed = params.table_freed;
> +		*table_freed = *table_freed || params.table_freed;
>  
>  error_unlock:
>  	amdgpu_vm_eviction_unlock(vm);
> @@ -1793,7 +1793,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
>   * 0 for success, -EINVAL for failure.
>   */
>  int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> -			bool clear)
> +			bool clear, bool *flush_tlb)
>  {
>  	struct amdgpu_bo *bo = bo_va->base.bo;
>  	struct amdgpu_vm *vm = bo_va->base.vm;
> @@ -1887,7 +1887,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>  						resv, mapping->start,
>  						mapping->last, update_flags,
>  						mapping->offset, mem,
> -						pages_addr, last_update, NULL,
> +						pages_addr, last_update, flush_tlb,
>  						vram_base_offset);
>  		if (r)
>  			return r;
> @@ -2141,7 +2141,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>  
>  	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
>  		/* Per VM BOs never need to bo cleared in the page tables */
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>  		if (r)
>  			return r;
>  	}
> @@ -2160,7 +2160,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>  		else
>  			clear = true;
>  
> -		r = amdgpu_vm_bo_update(adev, bo_va, clear);
> +		r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
>  		if (r)
>  			return r;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 67bba8462e7d..24a63e284a69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -419,7 +419,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  
>  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>  			struct amdgpu_bo_va *bo_va,
> -			bool clear);
> +			bool clear, bool *flush_tlb);
>  bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>  void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>  			     struct amdgpu_bo *bo, bool evicted);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 960913a35ee4..136f77cadc2f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1574,6 +1574,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  	long err = 0;
>  	int i;
>  	uint32_t *devices_arr = NULL;
> +	bool flush_tlb = false;
>  
>  	trace_kfd_map_memory_to_gpu_start(p);
>  	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
> @@ -1637,7 +1638,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  			goto map_memory_to_gpu_failed;
>  
>  		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
> -			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
> +			peer->kgd, (struct kgd_mem *)mem,
> +			peer_pdd->drm_priv, &flush_tlb);
>  		if (err) {
>  			pr_err("Failed to map to gpu %d/%d\n",
>  			       i, args->n_devices);
> @@ -1658,16 +1660,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>  	}
>  
>  	/* Flush TLBs after waiting for the page table updates to complete */
> -	for (i = 0; i < args->n_devices; i++) {
> -		peer = kfd_device_by_id(devices_arr[i]);
> -		if (WARN_ON_ONCE(!peer))
> -			continue;
> -		peer_pdd = kfd_get_process_device_data(peer, p);
> -		if (WARN_ON_ONCE(!peer_pdd))
> -			continue;
> -		if (!amdgpu_read_lock(peer->ddev, true)) {
> -			kfd_flush_tlb(peer_pdd);
> -			amdgpu_read_unlock(peer->ddev);
> +	if (flush_tlb) {
> +		for (i = 0; i < args->n_devices; i++) {
> +			peer = kfd_device_by_id(devices_arr[i]);
> +			if (WARN_ON_ONCE(!peer))
> +				continue;
> +			peer_pdd = kfd_get_process_device_data(peer, p);
> +			if (WARN_ON_ONCE(!peer_pdd))
> +				continue;
> +			if (!amdgpu_read_lock(peer->ddev, true)) {
> +				kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
> +				amdgpu_read_unlock(peer->ddev);
> +			}
>  		}
>  	}
>  
> @@ -1766,6 +1770,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>  			amdgpu_read_unlock(peer->ddev);
>  			goto unmap_memory_from_gpu_failed;
>  		}
> +		kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
>  		amdgpu_read_unlock(peer->ddev);
>  		args->n_success = i+1;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 2bd621eee4e0..904b8178c1d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
>  			qpd->vmid,
>  			qpd->page_table_base);
>  	/* invalidate the VM context after pasid and vmid mapping is set up */
> -	kfd_flush_tlb(qpd_to_pdd(qpd));
> +	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>  
>  	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
>  		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
> @@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>  		if (flush_texture_cache_nocpsch(q->device, qpd))
>  			pr_err("Failed to flush TC\n");
>  
> -	kfd_flush_tlb(qpd_to_pdd(qpd));
> +	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>  
>  	/* Release the vmid mapping */
>  	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
> @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>  				dqm->dev->kgd,
>  				qpd->vmid,
>  				qpd->page_table_base);
> -		kfd_flush_tlb(pdd);
> +		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
>  	}
>  
>  	/* Take a safe reference to the mm_struct, which may otherwise
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index ecdd5e782b81..edce3ecf207d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>  
>  void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
>  
> -void kfd_flush_tlb(struct kfd_process_device *pdd);
> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
>  
>  int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 3995002c582b..9708214116dc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -689,7 +689,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
>  	if (err)
>  		goto err_alloc_mem;
>  
> -	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
> +	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
> +			pdd->drm_priv, NULL);
>  	if (err)
>  		goto err_map_mem;
>  
> @@ -2159,7 +2160,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>  			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
>  }
>  
> -void kfd_flush_tlb(struct kfd_process_device *pdd)
> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
>  {
>  	struct kfd_dev *dev = pdd->dev;
>  
> @@ -2172,7 +2173,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
>  							pdd->qpd.vmid);
>  	} else {
>  		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> -					pdd->process->pasid, TLB_FLUSH_LEGACY);
> +					pdd->process->pasid, type);
>  	}
>  }
>  
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-06-01 21:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-01 21:30 [PATCH v3] drm/amdkfd: optimize memory mapping latency Eric Huang
2021-06-01 21:56 ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.