All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb
@ 2021-06-01 22:59 Eric Huang
  2021-06-01 22:59 ` [PATCH 2/4] drm/amdkfd: Add heavy-weight TLB flush after unmapping Eric Huang
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Eric Huang @ 2021-06-01 22:59 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang

It is to provide more tlb flush types opotion for different
case scenario.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c              | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c              | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 960913a35ee4..4da8aff3df27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1666,7 +1666,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 		if (WARN_ON_ONCE(!peer_pdd))
 			continue;
 		if (!amdgpu_read_lock(peer->ddev, true)) {
-			kfd_flush_tlb(peer_pdd);
+			kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
 			amdgpu_read_unlock(peer->ddev);
 		}
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 2bd621eee4e0..904b8178c1d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 			qpd->vmid,
 			qpd->page_table_base);
 	/* invalidate the VM context after pasid and vmid mapping is set up */
-	kfd_flush_tlb(qpd_to_pdd(qpd));
+	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
 
 	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
 		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
@@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 		if (flush_texture_cache_nocpsch(q->device, qpd))
 			pr_err("Failed to flush TC\n");
 
-	kfd_flush_tlb(qpd_to_pdd(qpd));
+	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
 
 	/* Release the vmid mapping */
 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 				dqm->dev->kgd,
 				qpd->vmid,
 				qpd->page_table_base);
-		kfd_flush_tlb(pdd);
+		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
 	}
 
 	/* Take a safe reference to the mm_struct, which may otherwise
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ecdd5e782b81..edce3ecf207d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
 
 void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
 
-void kfd_flush_tlb(struct kfd_process_device *pdd);
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
 
 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3995002c582b..72741f6579d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -2159,7 +2159,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
 
-void kfd_flush_tlb(struct kfd_process_device *pdd)
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
 {
 	struct kfd_dev *dev = pdd->dev;
 
@@ -2172,7 +2172,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
 							pdd->qpd.vmid);
 	} else {
 		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
-					pdd->process->pasid, TLB_FLUSH_LEGACY);
+					pdd->process->pasid, type);
 	}
 }
 
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] drm/amdkfd: Add heavy-weight TLB flush after unmapping
  2021-06-01 22:59 [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Eric Huang
@ 2021-06-01 22:59 ` Eric Huang
  2021-06-01 22:59 ` [PATCH 3/4] drm/amdgpu: Add flush_tlb parameter to amdgpu_vm_bo_update Eric Huang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Eric Huang @ 2021-06-01 22:59 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang

It is a part of memory mapping optimization.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 4da8aff3df27..98f1d2b586c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1766,6 +1766,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 			amdgpu_read_unlock(peer->ddev);
 			goto unmap_memory_from_gpu_failed;
 		}
+		kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
 		amdgpu_read_unlock(peer->ddev);
 		args->n_success = i+1;
 	}
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] drm/amdgpu: Add flush_tlb parameter to amdgpu_vm_bo_update
  2021-06-01 22:59 [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Eric Huang
  2021-06-01 22:59 ` [PATCH 2/4] drm/amdkfd: Add heavy-weight TLB flush after unmapping Eric Huang
@ 2021-06-01 22:59 ` Eric Huang
  2021-06-02  2:54   ` Felix Kuehling
  2021-06-01 22:59 ` [PATCH 4/4] drm/amdkfd: Make TLB flush conditional on mapping Eric Huang
  2021-06-02  6:53 ` [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Christian König
  3 siblings, 1 reply; 10+ messages in thread
From: Eric Huang @ 2021-06-01 22:59 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang

It is to pass the flag to KFD, and optimize table_freed in
amdgpu_vm_bo_update_mapping.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 10 +++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e9f9f462a652..e3df132e53a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -916,7 +916,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
 	if (r)
 		return r;
 
@@ -927,7 +927,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 		bo_va = fpriv->csa_va;
 		BUG_ON(!bo_va);
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 
@@ -946,7 +946,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		if (bo_va == NULL)
 			continue;
 
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 2120a87a949f..eac2fd0048cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -696,7 +696,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 
 	if (operation == AMDGPU_VA_OP_MAP ||
 	    operation == AMDGPU_VA_OP_REPLACE) {
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			goto error;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2c20bba7dc1a..fed3d44b5ded 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1729,7 +1729,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	r = vm->update_funcs->commit(&params, fence);
 
 	if (table_freed)
-		*table_freed = params.table_freed;
+		*table_freed = *table_freed || params.table_freed;
 
 error_unlock:
 	amdgpu_vm_eviction_unlock(vm);
@@ -1793,7 +1793,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
  * 0 for success, -EINVAL for failure.
  */
 int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
-			bool clear)
+			bool clear, bool *flush_tlb)
 {
 	struct amdgpu_bo *bo = bo_va->base.bo;
 	struct amdgpu_vm *vm = bo_va->base.vm;
@@ -1887,7 +1887,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 						resv, mapping->start,
 						mapping->last, update_flags,
 						mapping->offset, mem,
-						pages_addr, last_update, NULL,
+						pages_addr, last_update, flush_tlb,
 						vram_base_offset);
 		if (r)
 			return r;
@@ -2141,7 +2141,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 
 	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
 		/* Per VM BOs never need to bo cleared in the page tables */
-		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
 		if (r)
 			return r;
 	}
@@ -2160,7 +2160,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 		else
 			clear = true;
 
-		r = amdgpu_vm_bo_update(adev, bo_va, clear);
+		r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
 		if (r)
 			return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 67bba8462e7d..24a63e284a69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -419,7 +419,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
-			bool clear);
+			bool clear, bool *flush_tlb);
 bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			     struct amdgpu_bo *bo, bool evicted);
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] drm/amdkfd: Make TLB flush conditional on mapping
  2021-06-01 22:59 [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Eric Huang
  2021-06-01 22:59 ` [PATCH 2/4] drm/amdkfd: Add heavy-weight TLB flush after unmapping Eric Huang
  2021-06-01 22:59 ` [PATCH 3/4] drm/amdgpu: Add flush_tlb parameter to amdgpu_vm_bo_update Eric Huang
@ 2021-06-01 22:59 ` Eric Huang
  2021-06-02  6:53 ` [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Christian König
  3 siblings, 0 replies; 10+ messages in thread
From: Eric Huang @ 2021-06-01 22:59 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang

It is to optimize memory mapping latency, and to aviod
a page fault in a corner case of changing valid PDE into
PTE.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 19 ++++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 25 +++++++++++--------
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  3 ++-
 4 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2560977760b3..8f2d6711e12f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -280,7 +280,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
 		uint64_t *size);
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *flush_tlb);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_sync_memory(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1fcfa172911a..585b50b6009f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1117,7 +1117,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
 
 static int update_gpuvm_pte(struct kgd_mem *mem,
 			    struct kfd_mem_attachment *entry,
-			    struct amdgpu_sync *sync)
+			    struct amdgpu_sync *sync,
+			    bool *flush_tlb)
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
 	struct amdgpu_device *adev = entry->adev;
@@ -1128,7 +1129,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
 		return ret;
 
 	/* Update the page tables  */
-	ret = amdgpu_vm_bo_update(adev, bo_va, false);
+	ret = amdgpu_vm_bo_update(adev, bo_va, false, flush_tlb);
 	if (ret) {
 		pr_err("amdgpu_vm_bo_update failed\n");
 		return ret;
@@ -1140,7 +1141,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
 static int map_bo_to_gpuvm(struct kgd_mem *mem,
 			   struct kfd_mem_attachment *entry,
 			   struct amdgpu_sync *sync,
-			   bool no_update_pte)
+			   bool no_update_pte,
+			   bool *flush_tlb)
 {
 	int ret;
 
@@ -1157,7 +1159,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
 	if (no_update_pte)
 		return 0;
 
-	ret = update_gpuvm_pte(mem, entry, sync);
+	ret = update_gpuvm_pte(mem, entry, sync, flush_tlb);
 	if (ret) {
 		pr_err("update_gpuvm_pte() failed\n");
 		goto update_gpuvm_pte_failed;
@@ -1687,7 +1689,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 }
 
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-		struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
+		struct kgd_dev *kgd, struct kgd_mem *mem,
+		void *drm_priv, bool *flush_tlb)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1775,7 +1778,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			 entry->va, entry->va + bo_size, entry);
 
 		ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
-				      is_invalid_userptr);
+				      is_invalid_userptr, flush_tlb);
 		if (ret) {
 			pr_err("Failed to map bo to gpuvm\n");
 			goto out_unreserve;
@@ -2469,7 +2472,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 				continue;
 
 			kfd_mem_dmaunmap_attachment(mem, attachment);
-			ret = update_gpuvm_pte(mem, attachment, &sync);
+			ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
 			if (ret) {
 				pr_err("%s: update PTE failed\n", __func__);
 				/* make sure this gets validated again */
@@ -2675,7 +2678,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 				continue;
 
 			kfd_mem_dmaunmap_attachment(mem, attachment);
-			ret = update_gpuvm_pte(mem, attachment, &sync_obj);
+			ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
 			if (ret) {
 				pr_debug("Memory eviction: update PTE failed. Try again\n");
 				goto validate_map_fail;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 98f1d2b586c5..441d9ad955d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1574,6 +1574,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 	long err = 0;
 	int i;
 	uint32_t *devices_arr = NULL;
+	bool *flush_tlb = false;
 
 	trace_kfd_map_memory_to_gpu_start(p);
 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
@@ -1637,7 +1638,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 			goto map_memory_to_gpu_failed;
 
 		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+			peer->kgd, (struct kgd_mem *)mem,
+			peer_pdd->drm_priv, flush_tlb);
 		if (err) {
 			pr_err("Failed to map to gpu %d/%d\n",
 			       i, args->n_devices);
@@ -1658,19 +1660,20 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 	}
 
 	/* Flush TLBs after waiting for the page table updates to complete */
-	for (i = 0; i < args->n_devices; i++) {
-		peer = kfd_device_by_id(devices_arr[i]);
-		if (WARN_ON_ONCE(!peer))
-			continue;
-		peer_pdd = kfd_get_process_device_data(peer, p);
-		if (WARN_ON_ONCE(!peer_pdd))
+	if (flush_tlb) {
+		for (i = 0; i < args->n_devices; i++) {
+			peer = kfd_device_by_id(devices_arr[i]);
+			if (WARN_ON_ONCE(!peer))
 			continue;
-		if (!amdgpu_read_lock(peer->ddev, true)) {
-			kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
-			amdgpu_read_unlock(peer->ddev);
+			peer_pdd = kfd_get_process_device_data(peer, p);
+			if (WARN_ON_ONCE(!peer_pdd))
+				continue;
+			if (!amdgpu_read_lock(peer->ddev, true)) {
+				kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+				amdgpu_read_unlock(peer->ddev);
+			}
 		}
 	}
-
 	kfree(devices_arr);
 
 	trace_kfd_map_memory_to_gpu_end(p,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 72741f6579d3..9708214116dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -689,7 +689,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 	if (err)
 		goto err_alloc_mem;
 
-	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
+	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
+			pdd->drm_priv, NULL);
 	if (err)
 		goto err_map_mem;
 
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Add flush_tlb parameter to amdgpu_vm_bo_update
  2021-06-01 22:59 ` [PATCH 3/4] drm/amdgpu: Add flush_tlb parameter to amdgpu_vm_bo_update Eric Huang
@ 2021-06-02  2:54   ` Felix Kuehling
  2021-06-02  7:02     ` Christian König
  0 siblings, 1 reply; 10+ messages in thread
From: Felix Kuehling @ 2021-06-02  2:54 UTC (permalink / raw)
  To: Eric Huang, amd-gfx list

Am 2021-06-01 um 6:59 p.m. schrieb Eric Huang:
> It is to pass the flag to KFD, and optimize table_freed in
> amdgpu_vm_bo_update_mapping.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  6 +++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 10 +++++-----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 +-
>  4 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index e9f9f462a652..e3df132e53a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -916,7 +916,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>  	if (r)
>  		return r;
>  
> -	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
> +	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
>  	if (r)
>  		return r;
>  
> @@ -927,7 +927,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>  	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
>  		bo_va = fpriv->csa_va;
>  		BUG_ON(!bo_va);
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>  		if (r)
>  			return r;
>  
> @@ -946,7 +946,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>  		if (bo_va == NULL)
>  			continue;
>  
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>  		if (r)
>  			return r;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 2120a87a949f..eac2fd0048cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -696,7 +696,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>  
>  	if (operation == AMDGPU_VA_OP_MAP ||
>  	    operation == AMDGPU_VA_OP_REPLACE) {
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>  		if (r)
>  			goto error;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 2c20bba7dc1a..fed3d44b5ded 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1729,7 +1729,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  	r = vm->update_funcs->commit(&params, fence);
>  
>  	if (table_freed)
> -		*table_freed = params.table_freed;
> +		*table_freed = *table_freed || params.table_freed;
>  
>  error_unlock:
>  	amdgpu_vm_eviction_unlock(vm);
> @@ -1793,7 +1793,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
>   * 0 for success, -EINVAL for failure.
>   */
>  int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> -			bool clear)
> +			bool clear, bool *flush_tlb)

To be consistent with amdgpu_vm_bo_update_mapping I'd name this
parameter table_freed.


>  {
>  	struct amdgpu_bo *bo = bo_va->base.bo;
>  	struct amdgpu_vm *vm = bo_va->base.vm;
> @@ -1887,7 +1887,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>  						resv, mapping->start,
>  						mapping->last, update_flags,
>  						mapping->offset, mem,
> -						pages_addr, last_update, NULL,
> +						pages_addr, last_update, flush_tlb,
>  						vram_base_offset);
>  		if (r)
>  			return r;
> @@ -2141,7 +2141,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>  
>  	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
>  		/* Per VM BOs never need to bo cleared in the page tables */
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>  		if (r)
>  			return r;
>  	}
> @@ -2160,7 +2160,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>  		else
>  			clear = true;
>  
> -		r = amdgpu_vm_bo_update(adev, bo_va, clear);
> +		r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
>  		if (r)
>  			return r;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 67bba8462e7d..24a63e284a69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -419,7 +419,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>  
>  int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>  			struct amdgpu_bo_va *bo_va,
> -			bool clear);
> +			bool clear, bool *flush_tlb);

Same as above. With that fixed, the patch and the series is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

Please also give Christian a chance to review this patch in particular
before you submit.

Thanks,
  Felix


>  bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>  void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>  			     struct amdgpu_bo *bo, bool evicted);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb
  2021-06-01 22:59 [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Eric Huang
                   ` (2 preceding siblings ...)
  2021-06-01 22:59 ` [PATCH 4/4] drm/amdkfd: Make TLB flush conditional on mapping Eric Huang
@ 2021-06-02  6:53 ` Christian König
  2021-06-02 13:29   ` philip yang
  2021-06-02 14:53   ` Felix Kuehling
  3 siblings, 2 replies; 10+ messages in thread
From: Christian König @ 2021-06-02  6:53 UTC (permalink / raw)
  To: Eric Huang, amd-gfx, Kuehling, Felix, Yang, Philip

Mostly a question for Felix and Philip:

I've been thinking for a bit about how that case happens in the first place?

I mean if we have a PDE which points to PTEs and then switch that into a 
2MiB PTE then why wasn't that range invalidated before?

In other words when the PDE points to the PTEs we should have had an 
unmap operation on that range before which should have invalidated the TLB.

Regards,
Christian.

Am 02.06.21 um 00:59 schrieb Eric Huang:
> It is to provide more tlb flush types opotion for different
> case scenario.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c              | 2 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++---
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 | 2 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c              | 4 ++--
>   4 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 960913a35ee4..4da8aff3df27 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1666,7 +1666,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
>   		if (WARN_ON_ONCE(!peer_pdd))
>   			continue;
>   		if (!amdgpu_read_lock(peer->ddev, true)) {
> -			kfd_flush_tlb(peer_pdd);
> +			kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
>   			amdgpu_read_unlock(peer->ddev);
>   		}
>   	}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 2bd621eee4e0..904b8178c1d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
>   			qpd->vmid,
>   			qpd->page_table_base);
>   	/* invalidate the VM context after pasid and vmid mapping is set up */
> -	kfd_flush_tlb(qpd_to_pdd(qpd));
> +	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>   
>   	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
>   		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
> @@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>   		if (flush_texture_cache_nocpsch(q->device, qpd))
>   			pr_err("Failed to flush TC\n");
>   
> -	kfd_flush_tlb(qpd_to_pdd(qpd));
> +	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>   
>   	/* Release the vmid mapping */
>   	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
> @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>   				dqm->dev->kgd,
>   				qpd->vmid,
>   				qpd->page_table_base);
> -		kfd_flush_tlb(pdd);
> +		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
>   	}
>   
>   	/* Take a safe reference to the mm_struct, which may otherwise
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index ecdd5e782b81..edce3ecf207d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>   
>   void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
>   
> -void kfd_flush_tlb(struct kfd_process_device *pdd);
> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
>   
>   int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 3995002c582b..72741f6579d3 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -2159,7 +2159,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>   			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
>   }
>   
> -void kfd_flush_tlb(struct kfd_process_device *pdd)
> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
>   {
>   	struct kfd_dev *dev = pdd->dev;
>   
> @@ -2172,7 +2172,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
>   							pdd->qpd.vmid);
>   	} else {
>   		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
> -					pdd->process->pasid, TLB_FLUSH_LEGACY);
> +					pdd->process->pasid, type);
>   	}
>   }
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Add flush_tlb parameter to amdgpu_vm_bo_update
  2021-06-02  2:54   ` Felix Kuehling
@ 2021-06-02  7:02     ` Christian König
  0 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2021-06-02  7:02 UTC (permalink / raw)
  To: Felix Kuehling, Eric Huang, amd-gfx list

Am 02.06.21 um 04:54 schrieb Felix Kuehling:
> Am 2021-06-01 um 6:59 p.m. schrieb Eric Huang:
>> It is to pass the flag to KFD, and optimize table_freed in
>> amdgpu_vm_bo_update_mapping.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  6 +++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 10 +++++-----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  2 +-
>>   4 files changed, 10 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index e9f9f462a652..e3df132e53a5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -916,7 +916,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>>   	if (r)
>>   		return r;
>>   
>> -	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
>> +	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
>>   	if (r)
>>   		return r;
>>   
>> @@ -927,7 +927,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>>   	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
>>   		bo_va = fpriv->csa_va;
>>   		BUG_ON(!bo_va);
>> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
>> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>>   		if (r)
>>   			return r;
>>   
>> @@ -946,7 +946,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>>   		if (bo_va == NULL)
>>   			continue;
>>   
>> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
>> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>>   		if (r)
>>   			return r;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 2120a87a949f..eac2fd0048cc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -696,7 +696,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
>>   
>>   	if (operation == AMDGPU_VA_OP_MAP ||
>>   	    operation == AMDGPU_VA_OP_REPLACE) {
>> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
>> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>>   		if (r)
>>   			goto error;
>>   	}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 2c20bba7dc1a..fed3d44b5ded 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1729,7 +1729,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   	r = vm->update_funcs->commit(&params, fence);
>>   
>>   	if (table_freed)
>> -		*table_freed = params.table_freed;
>> +		*table_freed = *table_freed || params.table_freed;
>>   
>>   error_unlock:
>>   	amdgpu_vm_eviction_unlock(vm);
>> @@ -1793,7 +1793,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
>>    * 0 for success, -EINVAL for failure.
>>    */
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>> -			bool clear)
>> +			bool clear, bool *flush_tlb)
> To be consistent with amdgpu_vm_bo_update_mapping I'd name this
> parameter table_freed.
>
>
>>   {
>>   	struct amdgpu_bo *bo = bo_va->base.bo;
>>   	struct amdgpu_vm *vm = bo_va->base.vm;
>> @@ -1887,7 +1887,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>>   						resv, mapping->start,
>>   						mapping->last, update_flags,
>>   						mapping->offset, mem,
>> -						pages_addr, last_update, NULL,
>> +						pages_addr, last_update, flush_tlb,
>>   						vram_base_offset);
>>   		if (r)
>>   			return r;
>> @@ -2141,7 +2141,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>   
>>   	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
>>   		/* Per VM BOs never need to bo cleared in the page tables */
>> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
>> +		r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
>>   		if (r)
>>   			return r;
>>   	}
>> @@ -2160,7 +2160,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>>   		else
>>   			clear = true;
>>   
>> -		r = amdgpu_vm_bo_update(adev, bo_va, clear);
>> +		r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
>>   		if (r)
>>   			return r;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 67bba8462e7d..24a63e284a69 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -419,7 +419,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>>   
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   			struct amdgpu_bo_va *bo_va,
>> -			bool clear);
>> +			bool clear, bool *flush_tlb);
> Same as above. With that fixed, the patch and the series is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
> Please also give Christian a chance to review this patch in particular
> before you submit.

With the naming made consistent the patch is Reviewed-by: Christian 
König <christian.koenig@amd.com> as well.

>
> Thanks,
>    Felix
>
>
>>   bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>   			     struct amdgpu_bo *bo, bool evicted);
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb
  2021-06-02  6:53 ` [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Christian König
@ 2021-06-02 13:29   ` philip yang
  2021-06-02 13:31     ` Christian König
  2021-06-02 14:53   ` Felix Kuehling
  1 sibling, 1 reply; 10+ messages in thread
From: philip yang @ 2021-06-02 13:29 UTC (permalink / raw)
  To: Christian König, Eric Huang, amd-gfx, Kuehling, Felix, Yang, Philip

[-- Attachment #1: Type: text/html, Size: 10318 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb
  2021-06-02 13:29   ` philip yang
@ 2021-06-02 13:31     ` Christian König
  0 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2021-06-02 13:31 UTC (permalink / raw)
  To: philip yang, Eric Huang, amd-gfx, Kuehling, Felix, Yang, Philip


[-- Attachment #1.1: Type: text/plain, Size: 5889 bytes --]



Am 02.06.21 um 15:29 schrieb philip yang:
>
>
> On 2021-06-02 2:53 a.m., Christian König wrote:
>> Mostly a question for Felix and Philip:
>>
>> I've been thinking for a bit about how that case happens in the first 
>> place?
>>
>> I mean if we have a PDE which points to PTEs and then switch that 
>> into a 2MiB PTE then why wasn't that range invalidated before?
>>
>> In other words when the PDE points to the PTEs we should have had an 
>> unmap operation on that range before which should have invalidated 
>> the TLB.
>
> Because one cache line has 8 PDE0
>

Ah, of course! Yeah that makes totally sense now.

Christian.

> , after unmap flush tlb, access address on same PDE0 cache line will 
> load PDE0 back into tlb. For example:
>
> 1. map and access 0x7ffff6210000, unmap it, tlb flush
>
> 2. map and access 0x7ffff6400000, PDE0 for 0x7ffff6200000 into tlb, 
> which is P=0, V=1
>
> 3. map 0x7ffff6200000 update page table, access has vm fault because 
> tlb has PDE0 P=0,V=1, to recover this fault, map need update page 
> table and flush tlb.
>
> Regards,
>
> Philip
>
>>
>> Regards,
>> Christian.
>>
>> Am 02.06.21 um 00:59 schrieb Eric Huang:
>>> It is to provide more tlb flush types opotion for different
>>> case scenario.
>>>
>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c              | 2 +-
>>>   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 | 2 +-
>>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c              | 4 ++--
>>>   4 files changed, 7 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index 960913a35ee4..4da8aff3df27 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -1666,7 +1666,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct 
>>> file *filep,
>>>           if (WARN_ON_ONCE(!peer_pdd))
>>>               continue;
>>>           if (!amdgpu_read_lock(peer->ddev, true)) {
>>> -            kfd_flush_tlb(peer_pdd);
>>> +            kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
>>>               amdgpu_read_unlock(peer->ddev);
>>>           }
>>>       }
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> index 2bd621eee4e0..904b8178c1d7 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> @@ -278,7 +278,7 @@ static int allocate_vmid(struct 
>>> device_queue_manager *dqm,
>>>               qpd->vmid,
>>>               qpd->page_table_base);
>>>       /* invalidate the VM context after pasid and vmid mapping is 
>>> set up */
>>> -    kfd_flush_tlb(qpd_to_pdd(qpd));
>>> +    kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>>>         if (dqm->dev->kfd2kgd->set_scratch_backing_va)
>>> dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
>>> @@ -314,7 +314,7 @@ static void deallocate_vmid(struct 
>>> device_queue_manager *dqm,
>>>           if (flush_texture_cache_nocpsch(q->device, qpd))
>>>               pr_err("Failed to flush TC\n");
>>>   -    kfd_flush_tlb(qpd_to_pdd(qpd));
>>> +    kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>>>         /* Release the vmid mapping */
>>>       set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>>> @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct 
>>> device_queue_manager *dqm,
>>>                   dqm->dev->kgd,
>>>                   qpd->vmid,
>>>                   qpd->page_table_base);
>>> -        kfd_flush_tlb(pdd);
>>> +        kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
>>>       }
>>>         /* Take a safe reference to the mm_struct, which may otherwise
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index ecdd5e782b81..edce3ecf207d 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>>>     void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 
>>> pasid);
>>>   -void kfd_flush_tlb(struct kfd_process_device *pdd);
>>> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum 
>>> TLB_FLUSH_TYPE type);
>>>     int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct 
>>> kfd_process *p);
>>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> index 3995002c582b..72741f6579d3 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> @@ -2159,7 +2159,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, 
>>> struct kfd_process *process,
>>>                      KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
>>>   }
>>>   -void kfd_flush_tlb(struct kfd_process_device *pdd)
>>> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum 
>>> TLB_FLUSH_TYPE type)
>>>   {
>>>       struct kfd_dev *dev = pdd->dev;
>>>   @@ -2172,7 +2172,7 @@ void kfd_flush_tlb(struct kfd_process_device 
>>> *pdd)
>>>                               pdd->qpd.vmid);
>>>       } else {
>>>           amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
>>> -                    pdd->process->pasid, TLB_FLUSH_LEGACY);
>>> +                    pdd->process->pasid, type);
>>>       }
>>>   }
>>


[-- Attachment #1.2: Type: text/html, Size: 8963 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb
  2021-06-02  6:53 ` [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Christian König
  2021-06-02 13:29   ` philip yang
@ 2021-06-02 14:53   ` Felix Kuehling
  1 sibling, 0 replies; 10+ messages in thread
From: Felix Kuehling @ 2021-06-02 14:53 UTC (permalink / raw)
  To: Christian König, Eric Huang, amd-gfx, Yang, Philip

Am 2021-06-02 um 2:53 a.m. schrieb Christian König:
> Mostly a question for Felix and Philip:
>
> I've been thinking for a bit about how that case happens in the first
> place?
>
> I mean if we have a PDE which points to PTEs and then switch that into
> a 2MiB PTE then why wasn't that range invalidated before?
>
> In other words when the PDE points to the PTEs we should have had an
> unmap operation on that range before which should have invalidated the
> TLB.

The unmap operation doesn't change the PDE, it only changes the PTEs one
level down in the page table. For example, imagine you unmap a 4KB BO.
It's the last 4KB mapping within this 2MB block. But the unmapping only
updates the PTE for that 4KB page. It does not consolidate the entire
2MB block into an invalid PDE because it doesn't look outside the small
address range that it's unmapping.

Now you map a new 2MB BO at that virtual address. That's when the PTB
gets freed and the PDE gets turned into a PTE with the P bit set.

Regards,
  Felix


>
> Regards,
> Christian.
>
> Am 02.06.21 um 00:59 schrieb Eric Huang:
>> It is to provide more tlb flush types opotion for different
>> case scenario.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c              | 2 +-
>>   drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++---
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 | 2 +-
>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c              | 4 ++--
>>   4 files changed, 7 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 960913a35ee4..4da8aff3df27 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -1666,7 +1666,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct
>> file *filep,
>>           if (WARN_ON_ONCE(!peer_pdd))
>>               continue;
>>           if (!amdgpu_read_lock(peer->ddev, true)) {
>> -            kfd_flush_tlb(peer_pdd);
>> +            kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
>>               amdgpu_read_unlock(peer->ddev);
>>           }
>>       }
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 2bd621eee4e0..904b8178c1d7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -278,7 +278,7 @@ static int allocate_vmid(struct
>> device_queue_manager *dqm,
>>               qpd->vmid,
>>               qpd->page_table_base);
>>       /* invalidate the VM context after pasid and vmid mapping is
>> set up */
>> -    kfd_flush_tlb(qpd_to_pdd(qpd));
>> +    kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>>         if (dqm->dev->kfd2kgd->set_scratch_backing_va)
>>           dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
>> @@ -314,7 +314,7 @@ static void deallocate_vmid(struct
>> device_queue_manager *dqm,
>>           if (flush_texture_cache_nocpsch(q->device, qpd))
>>               pr_err("Failed to flush TC\n");
>>   -    kfd_flush_tlb(qpd_to_pdd(qpd));
>> +    kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
>>         /* Release the vmid mapping */
>>       set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>> @@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct
>> device_queue_manager *dqm,
>>                   dqm->dev->kgd,
>>                   qpd->vmid,
>>                   qpd->page_table_base);
>> -        kfd_flush_tlb(pdd);
>> +        kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
>>       }
>>         /* Take a safe reference to the mm_struct, which may otherwise
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index ecdd5e782b81..edce3ecf207d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
>>     void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32
>> pasid);
>>   -void kfd_flush_tlb(struct kfd_process_device *pdd);
>> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum
>> TLB_FLUSH_TYPE type);
>>     int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct
>> kfd_process *p);
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 3995002c582b..72741f6579d3 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -2159,7 +2159,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev,
>> struct kfd_process *process,
>>                      KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
>>   }
>>   -void kfd_flush_tlb(struct kfd_process_device *pdd)
>> +void kfd_flush_tlb(struct kfd_process_device *pdd, enum
>> TLB_FLUSH_TYPE type)
>>   {
>>       struct kfd_dev *dev = pdd->dev;
>>   @@ -2172,7 +2172,7 @@ void kfd_flush_tlb(struct kfd_process_device
>> *pdd)
>>                               pdd->qpd.vmid);
>>       } else {
>>           amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
>> -                    pdd->process->pasid, TLB_FLUSH_LEGACY);
>> +                    pdd->process->pasid, type);
>>       }
>>   }
>>   
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-06-02 14:53 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-01 22:59 [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Eric Huang
2021-06-01 22:59 ` [PATCH 2/4] drm/amdkfd: Add heavy-weight TLB flush after unmapping Eric Huang
2021-06-01 22:59 ` [PATCH 3/4] drm/amdgpu: Add flush_tlb parameter to amdgpu_vm_bo_update Eric Huang
2021-06-02  2:54   ` Felix Kuehling
2021-06-02  7:02     ` Christian König
2021-06-01 22:59 ` [PATCH 4/4] drm/amdkfd: Make TLB flush conditional on mapping Eric Huang
2021-06-02  6:53 ` [PATCH 1/4] drm/amdkfd: Add flush-type parameter to kfd_flush_tlb Christian König
2021-06-02 13:29   ` philip yang
2021-06-02 13:31     ` Christian König
2021-06-02 14:53   ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.