All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: Separate pinned BOs destruction from general routine
@ 2021-10-11  8:58 Lang Yu
  2021-10-13 15:24 ` Felix Kuehling
  0 siblings, 1 reply; 4+ messages in thread
From: Lang Yu @ 2021-10-11  8:58 UTC (permalink / raw)
  To: amd-gfx
  Cc: Christian Koenig, Felix Kuehling, Alex Deucher, Huang Rui, Lang Yu

Currently, all kfd BOs use same destruction routine. But pinned
BOs are not unpinned properly. Separate them from general routine.

Signed-off-by: Lang Yu <lang.yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   2 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 ++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 125 ++++++++++++++----
 5 files changed, 114 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 69de31754907..751557af09bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -279,6 +279,8 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
 		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 		struct kgd_mem *mem, void **kptr, uint64_t *size);
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem);
+
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 					    struct dma_fence **ef);
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 054c1a224def..6acc78b02bdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1871,6 +1871,16 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 	return ret;
 }
 
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem)
+{
+	struct amdgpu_bo *bo = mem->bo;
+
+	amdgpu_bo_reserve(bo, true);
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unpin(bo);
+	amdgpu_bo_unreserve(bo);
+}
+
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 					      struct kfd_vm_fault_info *mem)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f1e7edeb4e6b..0db48ac10fde 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1051,6 +1051,9 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 			pr_err("Failed to set event page\n");
 			return err;
 		}
+
+		p->signal_handle = args->event_page_offset;
+
 	}
 
 	err = kfd_event_create(filp, p, args->event_type,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6d8f9bb2d905..30f08f1606bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -608,12 +608,14 @@ struct qcm_process_device {
 	uint32_t sh_hidden_private_base;
 
 	/* CWSR memory */
+	struct kgd_mem *cwsr_mem;
 	void *cwsr_kaddr;
 	uint64_t cwsr_base;
 	uint64_t tba_addr;
 	uint64_t tma_addr;
 
 	/* IB memory */
+	struct kgd_mem *ib_mem;
 	uint64_t ib_base;
 	void *ib_kaddr;
 
@@ -808,6 +810,7 @@ struct kfd_process {
 	/* Event ID allocator and lookup */
 	struct idr event_idr;
 	/* Event page */
+	u64 signal_handle;
 	struct kfd_signal_page *signal_page;
 	size_t signal_mapped_size;
 	size_t signal_event_count;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 21ec8a18cad2..c024f2e2efaa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -72,6 +72,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
 static void evict_process_worker(struct work_struct *work);
 static void restore_process_worker(struct work_struct *work);
 
+static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd);
+
 struct kfd_procfs_tree {
 	struct kobject *kobj;
 };
@@ -685,10 +687,15 @@ void kfd_process_destroy_wq(void)
 }
 
 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
-			struct kfd_process_device *pdd)
+			struct kfd_process_device *pdd, void *kptr)
 {
 	struct kfd_dev *dev = pdd->dev;
 
+	if (kptr) {
+		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem);
+		kptr = NULL;
+	}
+
 	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
 					       NULL);
@@ -702,63 +709,46 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
  */
 static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 				   uint64_t gpu_va, uint32_t size,
-				   uint32_t flags, void **kptr)
+				   uint32_t flags, struct kgd_mem **mem, void **kptr)
 {
 	struct kfd_dev *kdev = pdd->dev;
-	struct kgd_mem *mem = NULL;
-	int handle;
 	int err;
 
 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
-						 pdd->drm_priv, &mem, NULL, flags);
+						 pdd->drm_priv, mem, NULL, flags);
 	if (err)
 		goto err_alloc_mem;
 
-	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
+	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem,
 			pdd->drm_priv, NULL);
 	if (err)
 		goto err_map_mem;
 
-	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
+	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true);
 	if (err) {
 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
 		goto sync_memory_failed;
 	}
 
-	/* Create an obj handle so kfd_process_device_remove_obj_handle
-	 * will take care of the bo removal when the process finishes.
-	 * We do not need to take p->mutex, because the process is just
-	 * created and the ioctls have not had the chance to run.
-	 */
-	handle = kfd_process_device_create_obj_handle(pdd, mem);
-
-	if (handle < 0) {
-		err = handle;
-		goto free_gpuvm;
-	}
-
 	if (kptr) {
 		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
-				(struct kgd_mem *)mem, kptr, NULL);
+				(struct kgd_mem *)*mem, kptr, NULL);
 		if (err) {
 			pr_debug("Map GTT BO to kernel failed\n");
-			goto free_obj_handle;
+			goto sync_memory_failed;
 		}
 	}
 
 	return err;
 
-free_obj_handle:
-	kfd_process_device_remove_obj_handle(pdd, handle);
-free_gpuvm:
 sync_memory_failed:
-	kfd_process_free_gpuvm(mem, pdd);
-	return err;
+	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv);
 
 err_map_mem:
-	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv,
 					       NULL);
 err_alloc_mem:
+	*mem = NULL;
 	*kptr = NULL;
 	return err;
 }
@@ -776,6 +766,7 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
 			KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
 			KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
 			KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+	struct kgd_mem *mem;
 	void *kaddr;
 	int ret;
 
@@ -784,15 +775,26 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
 
 	/* ib_base is only set for dGPU */
 	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
-				      &kaddr);
+				      &mem, &kaddr);
 	if (ret)
 		return ret;
 
+	qpd->ib_mem = mem;
 	qpd->ib_kaddr = kaddr;
 
 	return 0;
 }
 
+static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
+{
+	struct qcm_process_device *qpd = &pdd->qpd;
+
+	if (!qpd->ib_kaddr || !qpd->ib_base)
+		return;
+
+	kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
+}
+
 struct kfd_process *kfd_create_process(struct file *filep)
 {
 	struct kfd_process *process;
@@ -947,6 +949,52 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
 	}
 }
 
+static void kfd_process_free_signal_bo(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+	struct kfd_dev *kdev;
+	void *mem;
+	int i;
+
+	kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
+	if (!kdev)
+		return;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(kdev, p);
+	if (!pdd) {
+		mutex_unlock(&p->mutex);
+		return;
+	}
+
+	mem = kfd_process_device_translate_handle(
+		pdd, GET_IDR_HANDLE(p->signal_handle));
+	if (!mem) {
+		mutex_unlock(&p->mutex);
+		return;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *peer_pdd = p->pdds[i];
+
+		if (!peer_pdd->drm_priv)
+			continue;
+		amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+				peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
+	}
+
+	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem);
+
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem,
+		pdd->drm_priv, NULL);
+
+	kfd_process_device_remove_obj_handle(pdd,
+		GET_IDR_HANDLE(p->signal_handle));
+}
+
 static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
 {
 	int i;
@@ -965,6 +1013,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
 				pdd->dev->id, p->pasid);
 
+		kfd_process_device_destroy_cwsr_dgpu(pdd);
+		kfd_process_device_destroy_ib_mem(pdd);
+
 		if (pdd->drm_file) {
 			amdgpu_amdkfd_gpuvm_release_process_vm(
 					pdd->dev->kgd, pdd->drm_priv);
@@ -1049,9 +1100,11 @@ static void kfd_process_wq_release(struct work_struct *work)
 {
 	struct kfd_process *p = container_of(work, struct kfd_process,
 					     release_work);
+
 	kfd_process_remove_sysfs(p);
 	kfd_iommu_unbind_process(p);
 
+	kfd_process_free_signal_bo(p);
 	kfd_process_free_outstanding_kfd_bos(p);
 	svm_range_list_fini(p);
 
@@ -1066,6 +1119,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 	put_task_struct(p->lead_thread);
 
 	kfree(p);
+
 }
 
 static void kfd_process_ref_release(struct kref *ref)
@@ -1198,6 +1252,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
 	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
 			| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
 			| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+	struct kgd_mem *mem;
 	void *kaddr;
 	int ret;
 
@@ -1206,10 +1261,11 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
 
 	/* cwsr_base is only set for dGPU */
 	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
-				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
+				      KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr);
 	if (ret)
 		return ret;
 
+	qpd->cwsr_mem = mem;
 	qpd->cwsr_kaddr = kaddr;
 	qpd->tba_addr = qpd->cwsr_base;
 
@@ -1222,6 +1278,17 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
 	return 0;
 }
 
+static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
+{
+	struct kfd_dev *dev = pdd->dev;
+	struct qcm_process_device *qpd = &pdd->qpd;
+
+	if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
+		return;
+
+	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
+}
+
 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
 				  uint64_t tba_addr,
 				  uint64_t tma_addr)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdkfd: Separate pinned BOs destruction from general routine
  2021-10-11  8:58 [PATCH] drm/amdkfd: Separate pinned BOs destruction from general routine Lang Yu
@ 2021-10-13 15:24 ` Felix Kuehling
  2021-10-14 10:14   ` Yu, Lang
  0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2021-10-13 15:24 UTC (permalink / raw)
  To: Lang Yu, amd-gfx; +Cc: Christian Koenig, Alex Deucher, Huang Rui

Am 2021-10-11 um 4:58 a.m. schrieb Lang Yu:
> Currently, all kfd BOs use same destruction routine. But pinned
> BOs are not unpinned properly. Separate them from general routine.
>
> Signed-off-by: Lang Yu <lang.yu@amd.com>

I think the general idea is right. However, we need another safeguard
for the signal BO, which is allocated by user mode and can be freed by
user mode at any time. We can solve this in one of two ways:

 1. Add special handling for the signal BO in
    kfd_ioctl_free_memory_of_gpu to kunmap the BO and make sure the
    signal handling code is aware of it
 2. Fail kfd_ioctl_free_memory_of_gpu for signal BOs and only allow them
    to be destroyed at process termination

I think #2 is easier, and is consistent with what current user mode does.

A few more comment inline ...


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   2 +
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |   3 +
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 125 ++++++++++++++----
>  5 files changed, 114 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 69de31754907..751557af09bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -279,6 +279,8 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>  		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
>  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
>  		struct kgd_mem *mem, void **kptr, uint64_t *size);
> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem);
> +
>  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>  					    struct dma_fence **ef);
>  int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 054c1a224def..6acc78b02bdc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1871,6 +1871,16 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
>  	return ret;
>  }
>  
> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem)
> +{
> +	struct amdgpu_bo *bo = mem->bo;
> +
> +	amdgpu_bo_reserve(bo, true);
> +	amdgpu_bo_kunmap(bo);
> +	amdgpu_bo_unpin(bo);
> +	amdgpu_bo_unreserve(bo);
> +}
> +
>  int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
>  					      struct kfd_vm_fault_info *mem)
>  {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f1e7edeb4e6b..0db48ac10fde 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1051,6 +1051,9 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
>  			pr_err("Failed to set event page\n");

Need to kunmap the signal BO here.


>  			return err;
>  		}
> +
> +		p->signal_handle = args->event_page_offset;
> +
>  	}
>  
>  	err = kfd_event_create(filp, p, args->event_type,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 6d8f9bb2d905..30f08f1606bb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -608,12 +608,14 @@ struct qcm_process_device {
>  	uint32_t sh_hidden_private_base;
>  
>  	/* CWSR memory */
> +	struct kgd_mem *cwsr_mem;
>  	void *cwsr_kaddr;
>  	uint64_t cwsr_base;
>  	uint64_t tba_addr;
>  	uint64_t tma_addr;
>  
>  	/* IB memory */
> +	struct kgd_mem *ib_mem;
>  	uint64_t ib_base;
>  	void *ib_kaddr;
>  
> @@ -808,6 +810,7 @@ struct kfd_process {
>  	/* Event ID allocator and lookup */
>  	struct idr event_idr;
>  	/* Event page */
> +	u64 signal_handle;
>  	struct kfd_signal_page *signal_page;
>  	size_t signal_mapped_size;
>  	size_t signal_event_count;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 21ec8a18cad2..c024f2e2efaa 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -72,6 +72,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
>  static void evict_process_worker(struct work_struct *work);
>  static void restore_process_worker(struct work_struct *work);
>  
> +static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd);
> +
>  struct kfd_procfs_tree {
>  	struct kobject *kobj;
>  };
> @@ -685,10 +687,15 @@ void kfd_process_destroy_wq(void)
>  }
>  
>  static void kfd_process_free_gpuvm(struct kgd_mem *mem,
> -			struct kfd_process_device *pdd)
> +			struct kfd_process_device *pdd, void *kptr)
>  {
>  	struct kfd_dev *dev = pdd->dev;
>  
> +	if (kptr) {
> +		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem);
> +		kptr = NULL;
> +	}
> +
>  	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
>  	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
>  					       NULL);
> @@ -702,63 +709,46 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
>   */
>  static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
>  				   uint64_t gpu_va, uint32_t size,
> -				   uint32_t flags, void **kptr)
> +				   uint32_t flags, struct kgd_mem **mem, void **kptr)
>  {
>  	struct kfd_dev *kdev = pdd->dev;
> -	struct kgd_mem *mem = NULL;
> -	int handle;
>  	int err;
>  
>  	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
> -						 pdd->drm_priv, &mem, NULL, flags);
> +						 pdd->drm_priv, mem, NULL, flags);
>  	if (err)
>  		goto err_alloc_mem;
>  
> -	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
> +	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem,
>  			pdd->drm_priv, NULL);
>  	if (err)
>  		goto err_map_mem;
>  
> -	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
> +	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true);
>  	if (err) {
>  		pr_debug("Sync memory failed, wait interrupted by user signal\n");
>  		goto sync_memory_failed;
>  	}
>  
> -	/* Create an obj handle so kfd_process_device_remove_obj_handle
> -	 * will take care of the bo removal when the process finishes.
> -	 * We do not need to take p->mutex, because the process is just
> -	 * created and the ioctls have not had the chance to run.
> -	 */
> -	handle = kfd_process_device_create_obj_handle(pdd, mem);
> -
> -	if (handle < 0) {
> -		err = handle;
> -		goto free_gpuvm;
> -	}
> -
>  	if (kptr) {
>  		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
> -				(struct kgd_mem *)mem, kptr, NULL);
> +				(struct kgd_mem *)*mem, kptr, NULL);
>  		if (err) {
>  			pr_debug("Map GTT BO to kernel failed\n");
> -			goto free_obj_handle;
> +			goto sync_memory_failed;
>  		}
>  	}
>  
>  	return err;
>  
> -free_obj_handle:
> -	kfd_process_device_remove_obj_handle(pdd, handle);
> -free_gpuvm:
>  sync_memory_failed:
> -	kfd_process_free_gpuvm(mem, pdd);
> -	return err;
> +	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv);
>  
>  err_map_mem:
> -	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
> +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv,
>  					       NULL);
>  err_alloc_mem:
> +	*mem = NULL;
>  	*kptr = NULL;
>  	return err;
>  }
> @@ -776,6 +766,7 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
>  			KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
>  			KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
>  			KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> +	struct kgd_mem *mem;
>  	void *kaddr;
>  	int ret;
>  
> @@ -784,15 +775,26 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
>  
>  	/* ib_base is only set for dGPU */
>  	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
> -				      &kaddr);
> +				      &mem, &kaddr);
>  	if (ret)
>  		return ret;
>  
> +	qpd->ib_mem = mem;
>  	qpd->ib_kaddr = kaddr;
>  
>  	return 0;
>  }
>  
> +static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
> +{
> +	struct qcm_process_device *qpd = &pdd->qpd;
> +
> +	if (!qpd->ib_kaddr || !qpd->ib_base)
> +		return;
> +
> +	kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
> +}
> +
>  struct kfd_process *kfd_create_process(struct file *filep)
>  {
>  	struct kfd_process *process;
> @@ -947,6 +949,52 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
>  	}
>  }
>  
> +static void kfd_process_free_signal_bo(struct kfd_process *p)
> +{
> +	struct kfd_process_device *pdd;
> +	struct kfd_dev *kdev;
> +	void *mem;
> +	int i;
> +
> +	kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
> +	if (!kdev)
> +		return;
> +
> +	mutex_lock(&p->mutex);
> +
> +	pdd = kfd_get_process_device_data(kdev, p);
> +	if (!pdd) {
> +		mutex_unlock(&p->mutex);
> +		return;
> +	}
> +
> +	mem = kfd_process_device_translate_handle(
> +		pdd, GET_IDR_HANDLE(p->signal_handle));
> +	if (!mem) {
> +		mutex_unlock(&p->mutex);
> +		return;
> +	}
> +
> +	mutex_unlock(&p->mutex);
> +
> +	for (i = 0; i < p->n_pdds; i++) {
> +		struct kfd_process_device *peer_pdd = p->pdds[i];
> +
> +		if (!peer_pdd->drm_priv)
> +			continue;
> +		amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
> +				peer_pdd->dev->kgd, mem, peer_pdd->drm_priv);
> +	}
> +
> +	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem);

I think you only need to do the kunmap here. You can leave
"unmap_memory_from_gpu" and "free_memory_of_gpu" and "remove_obj_handle"
to be done in the regular kfd_process_free_outstanding_kfd_bos to avoid
duplicating that code.


> +
> +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem,
> +		pdd->drm_priv, NULL);
> +
> +	kfd_process_device_remove_obj_handle(pdd,
> +		GET_IDR_HANDLE(p->signal_handle));
> +}
> +
>  static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
>  {
>  	int i;
> @@ -965,6 +1013,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
>  		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
>  				pdd->dev->id, p->pasid);
>  
> +		kfd_process_device_destroy_cwsr_dgpu(pdd);
> +		kfd_process_device_destroy_ib_mem(pdd);
> +
>  		if (pdd->drm_file) {
>  			amdgpu_amdkfd_gpuvm_release_process_vm(
>  					pdd->dev->kgd, pdd->drm_priv);
> @@ -1049,9 +1100,11 @@ static void kfd_process_wq_release(struct work_struct *work)
>  {
>  	struct kfd_process *p = container_of(work, struct kfd_process,
>  					     release_work);
> +
>  	kfd_process_remove_sysfs(p);
>  	kfd_iommu_unbind_process(p);
>  
> +	kfd_process_free_signal_bo(p);
>  	kfd_process_free_outstanding_kfd_bos(p);
>  	svm_range_list_fini(p);
>  
> @@ -1066,6 +1119,7 @@ static void kfd_process_wq_release(struct work_struct *work)
>  	put_task_struct(p->lead_thread);
>  
>  	kfree(p);
> +

Unnecessary, trailing whitespace.

Regards,
  Felix


>  }
>  
>  static void kfd_process_ref_release(struct kref *ref)
> @@ -1198,6 +1252,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
>  	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
>  			| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
>  			| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> +	struct kgd_mem *mem;
>  	void *kaddr;
>  	int ret;
>  
> @@ -1206,10 +1261,11 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
>  
>  	/* cwsr_base is only set for dGPU */
>  	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
> -				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
> +				      KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr);
>  	if (ret)
>  		return ret;
>  
> +	qpd->cwsr_mem = mem;
>  	qpd->cwsr_kaddr = kaddr;
>  	qpd->tba_addr = qpd->cwsr_base;
>  
> @@ -1222,6 +1278,17 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
>  	return 0;
>  }
>  
> +static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
> +{
> +	struct kfd_dev *dev = pdd->dev;
> +	struct qcm_process_device *qpd = &pdd->qpd;
> +
> +	if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
> +		return;
> +
> +	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
> +}
> +
>  void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
>  				  uint64_t tba_addr,
>  				  uint64_t tma_addr)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] drm/amdkfd: Separate pinned BOs destruction from general routine
  2021-10-13 15:24 ` Felix Kuehling
@ 2021-10-14 10:14   ` Yu, Lang
  2021-10-14 17:14     ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Yu, Lang @ 2021-10-14 10:14 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx
  Cc: Koenig, Christian, Deucher, Alexander, Huang, Ray

[AMD Official Use Only]



>-----Original Message-----
>From: Kuehling, Felix <Felix.Kuehling@amd.com>
>Sent: Wednesday, October 13, 2021 11:25 PM
>To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>Cc: Koenig, Christian <Christian.Koenig@amd.com>; Deucher, Alexander
><Alexander.Deucher@amd.com>; Huang, Ray <Ray.Huang@amd.com>
>Subject: Re: [PATCH] drm/amdkfd: Separate pinned BOs destruction from
>general routine
>
>Am 2021-10-11 um 4:58 a.m. schrieb Lang Yu:
>> Currently, all kfd BOs use same destruction routine. But pinned BOs
>> are not unpinned properly. Separate them from general routine.
>>
>> Signed-off-by: Lang Yu <lang.yu@amd.com>
>
>I think the general idea is right. However, we need another safeguard for the
>signal BO, which is allocated by user mode and can be freed by user mode at
>any time. We can solve this in one of two ways:
>
> 1. Add special handling for the signal BO in
>    kfd_ioctl_free_memory_of_gpu to kunmap the BO and make sure the
>    signal handling code is aware of it
> 2. Fail kfd_ioctl_free_memory_of_gpu for signal BOs and only allow them
>    to be destroyed at process termination
>
>I think #2 is easier, and is consistent with what current user mode does.

Will add safeguard to prevent that according to #2.
 
>
>A few more comment inline ...
>
>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   2 +
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 ++
>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |   3 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 +
>>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 125 ++++++++++++++---
>-
>>  5 files changed, 114 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 69de31754907..751557af09bb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -279,6 +279,8 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>>  		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);  int
>> amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
>>  		struct kgd_mem *mem, void **kptr, uint64_t *size);
>> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct
>kgd_dev
>> +*kgd, struct kgd_mem *mem);
>> +
>>  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>>  					    struct dma_fence **ef);
>>  int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, diff
>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 054c1a224def..6acc78b02bdc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -1871,6 +1871,16 @@ int
>amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
>>  	return ret;
>>  }
>>
>> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct
>kgd_dev
>> +*kgd, struct kgd_mem *mem) {
>> +	struct amdgpu_bo *bo = mem->bo;
>> +
>> +	amdgpu_bo_reserve(bo, true);
>> +	amdgpu_bo_kunmap(bo);
>> +	amdgpu_bo_unpin(bo);
>> +	amdgpu_bo_unreserve(bo);
>> +}
>> +
>>  int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
>>  					      struct kfd_vm_fault_info *mem)
>{ diff --git
>> a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index f1e7edeb4e6b..0db48ac10fde 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -1051,6 +1051,9 @@ static int kfd_ioctl_create_event(struct file *filp,
>struct kfd_process *p,
>>  			pr_err("Failed to set event page\n");
>
>Need to kunmap the signal BO here.

Will kunmap it here.

>
>>  			return err;
>>  		}
>> +
>> +		p->signal_handle = args->event_page_offset;
>> +
>>  	}
>>
>>  	err = kfd_event_create(filp, p, args->event_type, diff --git
>> a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 6d8f9bb2d905..30f08f1606bb 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -608,12 +608,14 @@ struct qcm_process_device {
>>  	uint32_t sh_hidden_private_base;
>>
>>  	/* CWSR memory */
>> +	struct kgd_mem *cwsr_mem;
>>  	void *cwsr_kaddr;
>>  	uint64_t cwsr_base;
>>  	uint64_t tba_addr;
>>  	uint64_t tma_addr;
>>
>>  	/* IB memory */
>> +	struct kgd_mem *ib_mem;
>>  	uint64_t ib_base;
>>  	void *ib_kaddr;
>>
>> @@ -808,6 +810,7 @@ struct kfd_process {
>>  	/* Event ID allocator and lookup */
>>  	struct idr event_idr;
>>  	/* Event page */
>> +	u64 signal_handle;
>>  	struct kfd_signal_page *signal_page;
>>  	size_t signal_mapped_size;
>>  	size_t signal_event_count;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 21ec8a18cad2..c024f2e2efaa 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -72,6 +72,8 @@ static int kfd_process_init_cwsr_apu(struct
>> kfd_process *p, struct file *filep);  static void
>> evict_process_worker(struct work_struct *work);  static void
>> restore_process_worker(struct work_struct *work);
>>
>> +static void kfd_process_device_destroy_cwsr_dgpu(struct
>> +kfd_process_device *pdd);
>> +
>>  struct kfd_procfs_tree {
>>  	struct kobject *kobj;
>>  };
>> @@ -685,10 +687,15 @@ void kfd_process_destroy_wq(void)  }
>>
>>  static void kfd_process_free_gpuvm(struct kgd_mem *mem,
>> -			struct kfd_process_device *pdd)
>> +			struct kfd_process_device *pdd, void *kptr)
>>  {
>>  	struct kfd_dev *dev = pdd->dev;
>>
>> +	if (kptr) {
>> +		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev-
>>kgd, mem);
>> +		kptr = NULL;
>> +	}
>> +
>>  	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd,
>mem, pdd->drm_priv);
>>  	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem,
>pdd->drm_priv,
>>  					       NULL);
>> @@ -702,63 +709,46 @@ static void kfd_process_free_gpuvm(struct
>kgd_mem *mem,
>>   */
>>  static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
>>  				   uint64_t gpu_va, uint32_t size,
>> -				   uint32_t flags, void **kptr)
>> +				   uint32_t flags, struct kgd_mem **mem, void
>**kptr)
>>  {
>>  	struct kfd_dev *kdev = pdd->dev;
>> -	struct kgd_mem *mem = NULL;
>> -	int handle;
>>  	int err;
>>
>>  	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd,
>gpu_va, size,
>> -						 pdd->drm_priv, &mem, NULL,
>flags);
>> +						 pdd->drm_priv, mem, NULL,
>flags);
>>  	if (err)
>>  		goto err_alloc_mem;
>>
>> -	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd,
>mem,
>> +	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd,
>*mem,
>>  			pdd->drm_priv, NULL);
>>  	if (err)
>>  		goto err_map_mem;
>>
>> -	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem,
>true);
>> +	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem,
>true);
>>  	if (err) {
>>  		pr_debug("Sync memory failed, wait interrupted by user
>signal\n");
>>  		goto sync_memory_failed;
>>  	}
>>
>> -	/* Create an obj handle so kfd_process_device_remove_obj_handle
>> -	 * will take care of the bo removal when the process finishes.
>> -	 * We do not need to take p->mutex, because the process is just
>> -	 * created and the ioctls have not had the chance to run.
>> -	 */
>> -	handle = kfd_process_device_create_obj_handle(pdd, mem);
>> -
>> -	if (handle < 0) {
>> -		err = handle;
>> -		goto free_gpuvm;
>> -	}
>> -
>>  	if (kptr) {
>>  		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev-
>>kgd,
>> -				(struct kgd_mem *)mem, kptr, NULL);
>> +				(struct kgd_mem *)*mem, kptr, NULL);
>>  		if (err) {
>>  			pr_debug("Map GTT BO to kernel failed\n");
>> -			goto free_obj_handle;
>> +			goto sync_memory_failed;
>>  		}
>>  	}
>>
>>  	return err;
>>
>> -free_obj_handle:
>> -	kfd_process_device_remove_obj_handle(pdd, handle);
>> -free_gpuvm:
>>  sync_memory_failed:
>> -	kfd_process_free_gpuvm(mem, pdd);
>> -	return err;
>> +	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd,
>*mem,
>> +pdd->drm_priv);
>>
>>  err_map_mem:
>> -	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem,
>pdd->drm_priv,
>> +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem,
>> +pdd->drm_priv,
>>  					       NULL);
>>  err_alloc_mem:
>> +	*mem = NULL;
>>  	*kptr = NULL;
>>  	return err;
>>  }
>> @@ -776,6 +766,7 @@ static int
>kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
>>  			KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
>>  			KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
>>  			KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>> +	struct kgd_mem *mem;
>>  	void *kaddr;
>>  	int ret;
>>
>> @@ -784,15 +775,26 @@ static int
>> kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
>>
>>  	/* ib_base is only set for dGPU */
>>  	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
>> -				      &kaddr);
>> +				      &mem, &kaddr);
>>  	if (ret)
>>  		return ret;
>>
>> +	qpd->ib_mem = mem;
>>  	qpd->ib_kaddr = kaddr;
>>
>>  	return 0;
>>  }
>>
>> +static void kfd_process_device_destroy_ib_mem(struct
>> +kfd_process_device *pdd) {
>> +	struct qcm_process_device *qpd = &pdd->qpd;
>> +
>> +	if (!qpd->ib_kaddr || !qpd->ib_base)
>> +		return;
>> +
>> +	kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr); }
>> +
>>  struct kfd_process *kfd_create_process(struct file *filep)  {
>>  	struct kfd_process *process;
>> @@ -947,6 +949,52 @@ static void kfd_process_device_free_bos(struct
>kfd_process_device *pdd)
>>  	}
>>  }
>>
>> +static void kfd_process_free_signal_bo(struct kfd_process *p) {
>> +	struct kfd_process_device *pdd;
>> +	struct kfd_dev *kdev;
>> +	void *mem;
>> +	int i;
>> +
>> +	kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
>> +	if (!kdev)
>> +		return;
>> +
>> +	mutex_lock(&p->mutex);
>> +
>> +	pdd = kfd_get_process_device_data(kdev, p);
>> +	if (!pdd) {
>> +		mutex_unlock(&p->mutex);
>> +		return;
>> +	}
>> +
>> +	mem = kfd_process_device_translate_handle(
>> +		pdd, GET_IDR_HANDLE(p->signal_handle));
>> +	if (!mem) {
>> +		mutex_unlock(&p->mutex);
>> +		return;
>> +	}
>> +
>> +	mutex_unlock(&p->mutex);
>> +
>> +	for (i = 0; i < p->n_pdds; i++) {
>> +		struct kfd_process_device *peer_pdd = p->pdds[i];
>> +
>> +		if (!peer_pdd->drm_priv)
>> +			continue;
>> +		amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>> +				peer_pdd->dev->kgd, mem, peer_pdd-
>>drm_priv);
>> +	}
>> +
>> +	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd,
>mem);
>
>I think you only need to do the kunmap here. You can leave
>"unmap_memory_from_gpu" and "free_memory_of_gpu" and
>"remove_obj_handle"
>to be done in the regular kfd_process_free_outstanding_kfd_bos to avoid
>duplicating that code.

Good idea. Will just kunmap it here. 
>
>> +
>> +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem,
>> +		pdd->drm_priv, NULL);
>> +
>> +	kfd_process_device_remove_obj_handle(pdd,
>> +		GET_IDR_HANDLE(p->signal_handle));
>> +}
>> +
>>  static void kfd_process_free_outstanding_kfd_bos(struct kfd_process
>> *p)  {
>>  	int i;
>> @@ -965,6 +1013,9 @@ static void kfd_process_destroy_pdds(struct
>kfd_process *p)
>>  		pr_debug("Releasing pdd (topology id %d) for process (pasid
>0x%x)\n",
>>  				pdd->dev->id, p->pasid);
>>
>> +		kfd_process_device_destroy_cwsr_dgpu(pdd);
>> +		kfd_process_device_destroy_ib_mem(pdd);
>> +
>>  		if (pdd->drm_file) {
>>  			amdgpu_amdkfd_gpuvm_release_process_vm(
>>  					pdd->dev->kgd, pdd->drm_priv);
>> @@ -1049,9 +1100,11 @@ static void kfd_process_wq_release(struct
>> work_struct *work)  {
>>  	struct kfd_process *p = container_of(work, struct kfd_process,
>>  					     release_work);
>> +
>>  	kfd_process_remove_sysfs(p);
>>  	kfd_iommu_unbind_process(p);
>>
>> +	kfd_process_free_signal_bo(p);
>>  	kfd_process_free_outstanding_kfd_bos(p);
>>  	svm_range_list_fini(p);
>>
>> @@ -1066,6 +1119,7 @@ static void kfd_process_wq_release(struct
>work_struct *work)
>>  	put_task_struct(p->lead_thread);
>>
>>  	kfree(p);
>> +
>
>Unnecessary, trailing whitespace.

Will remove it.

Regards,
Lang

>
>Regards,
>  Felix
>
>
>>  }
>>
>>  static void kfd_process_ref_release(struct kref *ref) @@ -1198,6
>> +1252,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct
>kfd_process_device *pdd)
>>  	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
>>  			| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
>>  			| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>> +	struct kgd_mem *mem;
>>  	void *kaddr;
>>  	int ret;
>>
>> @@ -1206,10 +1261,11 @@ static int
>> kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
>>
>>  	/* cwsr_base is only set for dGPU */
>>  	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
>> -				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
>> +				      KFD_CWSR_TBA_TMA_SIZE, flags, &mem,
>&kaddr);
>>  	if (ret)
>>  		return ret;
>>
>> +	qpd->cwsr_mem = mem;
>>  	qpd->cwsr_kaddr = kaddr;
>>  	qpd->tba_addr = qpd->cwsr_base;
>>
>> @@ -1222,6 +1278,17 @@ static int
>kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
>>  	return 0;
>>  }
>>
>> +static void kfd_process_device_destroy_cwsr_dgpu(struct
>> +kfd_process_device *pdd) {
>> +	struct kfd_dev *dev = pdd->dev;
>> +	struct qcm_process_device *qpd = &pdd->qpd;
>> +
>> +	if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
>> +		return;
>> +
>> +	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr); }
>> +
>>  void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
>>  				  uint64_t tba_addr,
>>  				  uint64_t tma_addr)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdkfd: Separate pinned BOs destruction from general routine
  2021-10-14 10:14   ` Yu, Lang
@ 2021-10-14 17:14     ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2021-10-14 17:14 UTC (permalink / raw)
  To: Yu, Lang, Kuehling, Felix, amd-gfx
  Cc: Koenig, Christian, Deucher, Alexander, Huang, Ray

Am 14.10.21 um 12:14 schrieb Yu, Lang:
> [AMD Official Use Only]
>
>
>
>> -----Original Message-----
>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>> Sent: Wednesday, October 13, 2021 11:25 PM
>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Koenig, Christian <Christian.Koenig@amd.com>; Deucher, Alexander
>> <Alexander.Deucher@amd.com>; Huang, Ray <Ray.Huang@amd.com>
>> Subject: Re: [PATCH] drm/amdkfd: Separate pinned BOs destruction from
>> general routine
>>
>> Am 2021-10-11 um 4:58 a.m. schrieb Lang Yu:
>>> Currently, all kfd BOs use same destruction routine. But pinned BOs
>>> are not unpinned properly. Separate them from general routine.
>>>
>>> Signed-off-by: Lang Yu <lang.yu@amd.com>
>> I think the general idea is right. However, we need another safeguard for the
>> signal BO, which is allocated by user mode and can be freed by user mode at
>> any time. We can solve this in one of two ways:
>>
>> 1. Add special handling for the signal BO in
>>     kfd_ioctl_free_memory_of_gpu to kunmap the BO and make sure the
>>     signal handling code is aware of it
>> 2. Fail kfd_ioctl_free_memory_of_gpu for signal BOs and only allow them
>>     to be destroyed at process termination
>>
>> I think #2 is easier, and is consistent with what current user mode does.
> Will add safeguard to prevent that according to #2.

Well, exactly that are the things why upstream people insisted on this :)

Sounds like the best solution to me as well.

Thanks for taking care of this,
Christian.

>   
>> A few more comment inline ...
>>
>>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   2 +
>>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  10 ++
>>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |   3 +
>>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 +
>>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 125 ++++++++++++++---
>> -
>>>   5 files changed, 114 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index 69de31754907..751557af09bb 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -279,6 +279,8 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>>>   		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);  int
>>> amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
>>>   		struct kgd_mem *mem, void **kptr, uint64_t *size);
>>> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct
>> kgd_dev
>>> +*kgd, struct kgd_mem *mem);
>>> +
>>>   int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>>>   					    struct dma_fence **ef);
>>>   int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, diff
>>> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 054c1a224def..6acc78b02bdc 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -1871,6 +1871,16 @@ int
>> amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
>>>   	return ret;
>>>   }
>>>
>>> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct
>> kgd_dev
>>> +*kgd, struct kgd_mem *mem) {
>>> +	struct amdgpu_bo *bo = mem->bo;
>>> +
>>> +	amdgpu_bo_reserve(bo, true);
>>> +	amdgpu_bo_kunmap(bo);
>>> +	amdgpu_bo_unpin(bo);
>>> +	amdgpu_bo_unreserve(bo);
>>> +}
>>> +
>>>   int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
>>>   					      struct kfd_vm_fault_info *mem)
>> { diff --git
>>> a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index f1e7edeb4e6b..0db48ac10fde 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -1051,6 +1051,9 @@ static int kfd_ioctl_create_event(struct file *filp,
>> struct kfd_process *p,
>>>   			pr_err("Failed to set event page\n");
>> Need to kunmap the signal BO here.
> Will kunmap it here.
>
>>>   			return err;
>>>   		}
>>> +
>>> +		p->signal_handle = args->event_page_offset;
>>> +
>>>   	}
>>>
>>>   	err = kfd_event_create(filp, p, args->event_type, diff --git
>>> a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index 6d8f9bb2d905..30f08f1606bb 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -608,12 +608,14 @@ struct qcm_process_device {
>>>   	uint32_t sh_hidden_private_base;
>>>
>>>   	/* CWSR memory */
>>> +	struct kgd_mem *cwsr_mem;
>>>   	void *cwsr_kaddr;
>>>   	uint64_t cwsr_base;
>>>   	uint64_t tba_addr;
>>>   	uint64_t tma_addr;
>>>
>>>   	/* IB memory */
>>> +	struct kgd_mem *ib_mem;
>>>   	uint64_t ib_base;
>>>   	void *ib_kaddr;
>>>
>>> @@ -808,6 +810,7 @@ struct kfd_process {
>>>   	/* Event ID allocator and lookup */
>>>   	struct idr event_idr;
>>>   	/* Event page */
>>> +	u64 signal_handle;
>>>   	struct kfd_signal_page *signal_page;
>>>   	size_t signal_mapped_size;
>>>   	size_t signal_event_count;
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> index 21ec8a18cad2..c024f2e2efaa 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> @@ -72,6 +72,8 @@ static int kfd_process_init_cwsr_apu(struct
>>> kfd_process *p, struct file *filep);  static void
>>> evict_process_worker(struct work_struct *work);  static void
>>> restore_process_worker(struct work_struct *work);
>>>
>>> +static void kfd_process_device_destroy_cwsr_dgpu(struct
>>> +kfd_process_device *pdd);
>>> +
>>>   struct kfd_procfs_tree {
>>>   	struct kobject *kobj;
>>>   };
>>> @@ -685,10 +687,15 @@ void kfd_process_destroy_wq(void)  }
>>>
>>>   static void kfd_process_free_gpuvm(struct kgd_mem *mem,
>>> -			struct kfd_process_device *pdd)
>>> +			struct kfd_process_device *pdd, void *kptr)
>>>   {
>>>   	struct kfd_dev *dev = pdd->dev;
>>>
>>> +	if (kptr) {
>>> +		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev-
>>> kgd, mem);
>>> +		kptr = NULL;
>>> +	}
>>> +
>>>   	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd,
>> mem, pdd->drm_priv);
>>>   	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem,
>> pdd->drm_priv,
>>>   					       NULL);
>>> @@ -702,63 +709,46 @@ static void kfd_process_free_gpuvm(struct
>> kgd_mem *mem,
>>>    */
>>>   static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
>>>   				   uint64_t gpu_va, uint32_t size,
>>> -				   uint32_t flags, void **kptr)
>>> +				   uint32_t flags, struct kgd_mem **mem, void
>> **kptr)
>>>   {
>>>   	struct kfd_dev *kdev = pdd->dev;
>>> -	struct kgd_mem *mem = NULL;
>>> -	int handle;
>>>   	int err;
>>>
>>>   	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd,
>> gpu_va, size,
>>> -						 pdd->drm_priv, &mem, NULL,
>> flags);
>>> +						 pdd->drm_priv, mem, NULL,
>> flags);
>>>   	if (err)
>>>   		goto err_alloc_mem;
>>>
>>> -	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd,
>> mem,
>>> +	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd,
>> *mem,
>>>   			pdd->drm_priv, NULL);
>>>   	if (err)
>>>   		goto err_map_mem;
>>>
>>> -	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem,
>> true);
>>> +	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem,
>> true);
>>>   	if (err) {
>>>   		pr_debug("Sync memory failed, wait interrupted by user
>> signal\n");
>>>   		goto sync_memory_failed;
>>>   	}
>>>
>>> -	/* Create an obj handle so kfd_process_device_remove_obj_handle
>>> -	 * will take care of the bo removal when the process finishes.
>>> -	 * We do not need to take p->mutex, because the process is just
>>> -	 * created and the ioctls have not had the chance to run.
>>> -	 */
>>> -	handle = kfd_process_device_create_obj_handle(pdd, mem);
>>> -
>>> -	if (handle < 0) {
>>> -		err = handle;
>>> -		goto free_gpuvm;
>>> -	}
>>> -
>>>   	if (kptr) {
>>>   		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev-
>>> kgd,
>>> -				(struct kgd_mem *)mem, kptr, NULL);
>>> +				(struct kgd_mem *)*mem, kptr, NULL);
>>>   		if (err) {
>>>   			pr_debug("Map GTT BO to kernel failed\n");
>>> -			goto free_obj_handle;
>>> +			goto sync_memory_failed;
>>>   		}
>>>   	}
>>>
>>>   	return err;
>>>
>>> -free_obj_handle:
>>> -	kfd_process_device_remove_obj_handle(pdd, handle);
>>> -free_gpuvm:
>>>   sync_memory_failed:
>>> -	kfd_process_free_gpuvm(mem, pdd);
>>> -	return err;
>>> +	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd,
>> *mem,
>>> +pdd->drm_priv);
>>>
>>>   err_map_mem:
>>> -	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem,
>> pdd->drm_priv,
>>> +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem,
>>> +pdd->drm_priv,
>>>   					       NULL);
>>>   err_alloc_mem:
>>> +	*mem = NULL;
>>>   	*kptr = NULL;
>>>   	return err;
>>>   }
>>> @@ -776,6 +766,7 @@ static int
>> kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
>>>   			KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
>>>   			KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
>>>   			KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>>> +	struct kgd_mem *mem;
>>>   	void *kaddr;
>>>   	int ret;
>>>
>>> @@ -784,15 +775,26 @@ static int
>>> kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
>>>
>>>   	/* ib_base is only set for dGPU */
>>>   	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
>>> -				      &kaddr);
>>> +				      &mem, &kaddr);
>>>   	if (ret)
>>>   		return ret;
>>>
>>> +	qpd->ib_mem = mem;
>>>   	qpd->ib_kaddr = kaddr;
>>>
>>>   	return 0;
>>>   }
>>>
>>> +static void kfd_process_device_destroy_ib_mem(struct
>>> +kfd_process_device *pdd) {
>>> +	struct qcm_process_device *qpd = &pdd->qpd;
>>> +
>>> +	if (!qpd->ib_kaddr || !qpd->ib_base)
>>> +		return;
>>> +
>>> +	kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr); }
>>> +
>>>   struct kfd_process *kfd_create_process(struct file *filep)  {
>>>   	struct kfd_process *process;
>>> @@ -947,6 +949,52 @@ static void kfd_process_device_free_bos(struct
>> kfd_process_device *pdd)
>>>   	}
>>>   }
>>>
>>> +static void kfd_process_free_signal_bo(struct kfd_process *p) {
>>> +	struct kfd_process_device *pdd;
>>> +	struct kfd_dev *kdev;
>>> +	void *mem;
>>> +	int i;
>>> +
>>> +	kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
>>> +	if (!kdev)
>>> +		return;
>>> +
>>> +	mutex_lock(&p->mutex);
>>> +
>>> +	pdd = kfd_get_process_device_data(kdev, p);
>>> +	if (!pdd) {
>>> +		mutex_unlock(&p->mutex);
>>> +		return;
>>> +	}
>>> +
>>> +	mem = kfd_process_device_translate_handle(
>>> +		pdd, GET_IDR_HANDLE(p->signal_handle));
>>> +	if (!mem) {
>>> +		mutex_unlock(&p->mutex);
>>> +		return;
>>> +	}
>>> +
>>> +	mutex_unlock(&p->mutex);
>>> +
>>> +	for (i = 0; i < p->n_pdds; i++) {
>>> +		struct kfd_process_device *peer_pdd = p->pdds[i];
>>> +
>>> +		if (!peer_pdd->drm_priv)
>>> +			continue;
>>> +		amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>> +				peer_pdd->dev->kgd, mem, peer_pdd-
>>> drm_priv);
>>> +	}
>>> +
>>> +	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd,
>> mem);
>>
>> I think you only need to do the kunmap here. You can leave
>> "unmap_memory_from_gpu" and "free_memory_of_gpu" and
>> "remove_obj_handle"
>> to be done in the regular kfd_process_free_outstanding_kfd_bos to avoid
>> duplicating that code.
> Good idea. Will just kunmap it here.
>>> +
>>> +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem,
>>> +		pdd->drm_priv, NULL);
>>> +
>>> +	kfd_process_device_remove_obj_handle(pdd,
>>> +		GET_IDR_HANDLE(p->signal_handle));
>>> +}
>>> +
>>>   static void kfd_process_free_outstanding_kfd_bos(struct kfd_process
>>> *p)  {
>>>   	int i;
>>> @@ -965,6 +1013,9 @@ static void kfd_process_destroy_pdds(struct
>> kfd_process *p)
>>>   		pr_debug("Releasing pdd (topology id %d) for process (pasid
>> 0x%x)\n",
>>>   				pdd->dev->id, p->pasid);
>>>
>>> +		kfd_process_device_destroy_cwsr_dgpu(pdd);
>>> +		kfd_process_device_destroy_ib_mem(pdd);
>>> +
>>>   		if (pdd->drm_file) {
>>>   			amdgpu_amdkfd_gpuvm_release_process_vm(
>>>   					pdd->dev->kgd, pdd->drm_priv);
>>> @@ -1049,9 +1100,11 @@ static void kfd_process_wq_release(struct
>>> work_struct *work)  {
>>>   	struct kfd_process *p = container_of(work, struct kfd_process,
>>>   					     release_work);
>>> +
>>>   	kfd_process_remove_sysfs(p);
>>>   	kfd_iommu_unbind_process(p);
>>>
>>> +	kfd_process_free_signal_bo(p);
>>>   	kfd_process_free_outstanding_kfd_bos(p);
>>>   	svm_range_list_fini(p);
>>>
>>> @@ -1066,6 +1119,7 @@ static void kfd_process_wq_release(struct
>> work_struct *work)
>>>   	put_task_struct(p->lead_thread);
>>>
>>>   	kfree(p);
>>> +
>> Unnecessary, trailing whitespace.
> Will remove it.
>
> Regards,
> Lang
>
>> Regards,
>>    Felix
>>
>>
>>>   }
>>>
>>>   static void kfd_process_ref_release(struct kref *ref) @@ -1198,6
>>> +1252,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct
>> kfd_process_device *pdd)
>>>   	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
>>>   			| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
>>>   			| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>>> +	struct kgd_mem *mem;
>>>   	void *kaddr;
>>>   	int ret;
>>>
>>> @@ -1206,10 +1261,11 @@ static int
>>> kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
>>>
>>>   	/* cwsr_base is only set for dGPU */
>>>   	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
>>> -				      KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
>>> +				      KFD_CWSR_TBA_TMA_SIZE, flags, &mem,
>> &kaddr);
>>>   	if (ret)
>>>   		return ret;
>>>
>>> +	qpd->cwsr_mem = mem;
>>>   	qpd->cwsr_kaddr = kaddr;
>>>   	qpd->tba_addr = qpd->cwsr_base;
>>>
>>> @@ -1222,6 +1278,17 @@ static int
>> kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
>>>   	return 0;
>>>   }
>>>
>>> +static void kfd_process_device_destroy_cwsr_dgpu(struct
>>> +kfd_process_device *pdd) {
>>> +	struct kfd_dev *dev = pdd->dev;
>>> +	struct qcm_process_device *qpd = &pdd->qpd;
>>> +
>>> +	if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
>>> +		return;
>>> +
>>> +	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr); }
>>> +
>>>   void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
>>>   				  uint64_t tba_addr,
>>>   				  uint64_t tma_addr)


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-10-14 17:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-11  8:58 [PATCH] drm/amdkfd: Separate pinned BOs destruction from general routine Lang Yu
2021-10-13 15:24 ` Felix Kuehling
2021-10-14 10:14   ` Yu, Lang
2021-10-14 17:14     ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.