[PATCH 1/3] drm/amdgpu: extract a helper to import dmabuf more flexibly

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/3] drm/amdgpu: extract a helper to import dmabuf more flexibly
@ 2022-07-25 10:32 Lang Yu
  2022-07-25 10:32 ` [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling Lang Yu
  2022-07-25 10:32 ` [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref Lang Yu
  0 siblings, 2 replies; 13+ messages in thread
From: Lang Yu @ 2022-07-25 10:32 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Felix Kuehling, Huang Rui, Lang Yu, Christian Koenig

For clients(e.g., kfd) who want to determine whether to
create a buffer object by themselves especially when
importing a gfx BO based dmabuf.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 38 +++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |  2 ++
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 579adfafe4d0..83bbf54d5562 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -429,6 +429,28 @@ static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
 	.move_notify = amdgpu_dma_buf_move_notify
 };
 
+struct drm_gem_object *
+amdgpu_dma_buf_create_obj_and_attach(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attach;
+	struct drm_gem_object *obj;
+
+	obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
+					&amdgpu_dma_buf_attach_ops, obj);
+	if (IS_ERR(attach)) {
+		drm_gem_object_put(obj);
+		return ERR_CAST(attach);
+	}
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
+}
+
 /**
  * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
  * @dev: DRM device
@@ -442,7 +464,6 @@ static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 					       struct dma_buf *dma_buf)
 {
-	struct dma_buf_attachment *attach;
 	struct drm_gem_object *obj;
 
 	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
@@ -457,20 +478,7 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 		}
 	}
 
-	obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
-	if (IS_ERR(obj))
-		return obj;
-
-	attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
-					&amdgpu_dma_buf_attach_ops, obj);
-	if (IS_ERR(attach)) {
-		drm_gem_object_put(obj);
-		return ERR_CAST(attach);
-	}
-
-	get_dma_buf(dma_buf);
-	obj->import_attach = attach;
-	return obj;
+	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index 3e93b9b407a9..3b89e3af7c06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -27,6 +27,8 @@
 
 struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
 					int flags);
+struct drm_gem_object*
+amdgpu_dma_buf_create_obj_and_attach(struct drm_device *dev, struct dma_buf *dma_buf);
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 					    struct dma_buf *dma_buf);
 bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-25 10:32 [PATCH 1/3] drm/amdgpu: extract a helper to import dmabuf more flexibly Lang Yu
@ 2022-07-25 10:32 ` Lang Yu
  2022-07-25 14:20   ` Felix Kuehling
  2022-07-25 10:32 ` [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref Lang Yu
  1 sibling, 1 reply; 13+ messages in thread
From: Lang Yu @ 2022-07-25 10:32 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Felix Kuehling, Huang Rui, Lang Yu, Christian Koenig

We have memory leak issue in current implenmention, i.e.,
the allocated struct kgd_mem memory is not handled properly.

The idea is always creating a buffer object when importing
a gfx BO based dmabuf.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
 1 file changed, 70 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b3806ebe5ef7..c1855b72a3f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 	u32 alloc_flags = bo->kfd_bo->alloc_flags;
 	u64 size = amdgpu_bo_size(bo);
 
-	unreserve_mem_limit(adev, size, alloc_flags);
+	if (!bo->kfd_bo->is_imported)
+		unreserve_mem_limit(adev, size, alloc_flags);
 
 	kfree(bo->kfd_bo);
 }
@@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 	}
 }
 
+static struct drm_gem_object*
+amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+	struct drm_gem_object *gobj;
+	struct amdgpu_bo *abo;
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+		gobj = dma_buf->priv;
+		abo = gem_to_amdgpu_bo(gobj);
+		if (gobj->dev == dev && abo->kfd_bo) {
+			drm_gem_object_get(gobj);
+			return gobj;
+		}
+	}
+
+	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
+}
+
 static int
 kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
 		      struct amdgpu_bo **bo)
@@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
 		}
 	}
 
-	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
+	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
 	if (IS_ERR(gobj))
 		return PTR_ERR(gobj);
 
@@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 {
 	struct amdkfd_process_info *process_info = mem->process_info;
 	unsigned long bo_size = mem->bo->tbo.base.size;
+	bool is_imported = false;
+	struct drm_gem_object *imported_gobj;
 	struct kfd_mem_attachment *entry, *tmp;
 	struct bo_vm_reservation_context ctx;
 	struct ttm_validate_buffer *bo_list_entry;
 	unsigned int mapped_to_gpu_memory;
 	int ret;
-	bool is_imported = false;
 
 	mutex_lock(&mem->lock);
 
@@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	}
 
 	/* Free the BO*/
-	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+	if (!is_imported) {
+		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+	} else {
+		imported_gobj = mem->dmabuf->priv;
+		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
+	}
+
 	if (mem->dmabuf)
 		dma_buf_put(mem->dmabuf);
 	mutex_destroy(&mem->lock);
@@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
 				      uint64_t *mmap_offset)
 {
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
-	struct drm_gem_object *obj;
-	struct amdgpu_bo *bo;
+	struct drm_gem_object *imported_gobj, *gobj;
+	struct amdgpu_bo *imported_bo, *bo;
 	int ret;
 
-	if (dma_buf->ops != &amdgpu_dmabuf_ops)
-		/* Can't handle non-graphics buffers */
+	/*
+	 * Can't handle non-graphics buffers and
+	 * buffers from other devices
+	 */
+	imported_gobj = dma_buf->priv;
+	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
+	    drm_to_adev(imported_gobj->dev) != adev)
 		return -EINVAL;
 
-	obj = dma_buf->priv;
-	if (drm_to_adev(obj->dev) != adev)
-		/* Can't handle buffers from other devices */
+	/* Only VRAM and GTT BOs are supported */
+	imported_bo = gem_to_amdgpu_bo(imported_gobj);
+	if (!(imported_bo->preferred_domains &
+	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
 		return -EINVAL;
 
-	bo = gem_to_amdgpu_bo(obj);
-	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-				    AMDGPU_GEM_DOMAIN_GTT)))
-		/* Only VRAM and GTT BOs are supported */
-		return -EINVAL;
+	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
+	if (ret)
+		return ret;
 
-	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem)
-		return -ENOMEM;
+	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
+	if (IS_ERR(gobj)) {
+		ret = PTR_ERR(gobj);
+		goto err_import;
+	}
 
-	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
-	if (ret) {
-		kfree(mem);
-		return ret;
+	bo = gem_to_amdgpu_bo(gobj);
+	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem) {
+		ret = -ENOMEM;
+		goto err_alloc_mem;
 	}
 
 	if (size)
-		*size = amdgpu_bo_size(bo);
+		*size = amdgpu_bo_size(imported_bo);
 
 	if (mmap_offset)
-		*mmap_offset = amdgpu_bo_mmap_offset(bo);
+		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
 
 	INIT_LIST_HEAD(&(*mem)->attachments);
 	mutex_init(&(*mem)->lock);
 
 	(*mem)->alloc_flags =
-		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
 		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
 		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
 		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
 
-	drm_gem_object_get(&bo->tbo.base);
+	get_dma_buf(dma_buf);
+	(*mem)->dmabuf = dma_buf;
 	(*mem)->bo = bo;
 	(*mem)->va = va;
-	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
-		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
 	(*mem)->mapped_to_gpu_memory = 0;
 	(*mem)->process_info = avm->process_info;
 	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
 	amdgpu_sync_create(&(*mem)->sync);
 	(*mem)->is_imported = true;
+	bo->kfd_bo = *mem;
 
 	return 0;
+err_import:
+	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
+err_alloc_mem:
+	drm_gem_object_put(gobj);
+	return ret;
 }
 
 /* Evict a userptr BO by stopping the queues if necessary
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref
  2022-07-25 10:32 [PATCH 1/3] drm/amdgpu: extract a helper to import dmabuf more flexibly Lang Yu
  2022-07-25 10:32 ` [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling Lang Yu
@ 2022-07-25 10:32 ` Lang Yu
  2022-07-28  2:33   ` Yu, Lang
  2022-07-28  2:45   ` Felix Kuehling
  1 sibling, 2 replies; 13+ messages in thread
From: Lang Yu @ 2022-07-25 10:32 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alex Deucher, Felix Kuehling, Huang Rui, Lang Yu, Christian Koenig

No need to reference the BO here, dmabuf framework will handle that.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index c1855b72a3f0..802c762108b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -827,7 +827,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
 
 	*bo = gem_to_amdgpu_bo(gobj);
 	(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
-	(*bo)->parent = amdgpu_bo_ref(mem->bo);
 
 	return 0;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-25 10:32 ` [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling Lang Yu
@ 2022-07-25 14:20   ` Felix Kuehling
  2022-07-26  0:15     ` Lang Yu
  0 siblings, 1 reply; 13+ messages in thread
From: Felix Kuehling @ 2022-07-25 14:20 UTC (permalink / raw)
  To: Lang Yu, amd-gfx; +Cc: Alex Deucher, Huang Rui, Christian Koenig


Am 2022-07-25 um 06:32 schrieb Lang Yu:
> We have memory leak issue in current implenmention, i.e.,
> the allocated struct kgd_mem memory is not handled properly.
>
> The idea is always creating a buffer object when importing
> a gfx BO based dmabuf.
>
> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
>   1 file changed, 70 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index b3806ebe5ef7..c1855b72a3f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>   	u32 alloc_flags = bo->kfd_bo->alloc_flags;
>   	u64 size = amdgpu_bo_size(bo);
>   
> -	unreserve_mem_limit(adev, size, alloc_flags);
> +	if (!bo->kfd_bo->is_imported)
> +		unreserve_mem_limit(adev, size, alloc_flags);
>   
>   	kfree(bo->kfd_bo);
>   }
> @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
>   	}
>   }
>   
> +static struct drm_gem_object*
> +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
> +{
> +	struct drm_gem_object *gobj;
> +	struct amdgpu_bo *abo;
> +
> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {

I'd rather remove this limitation. We should be able to use any DMABuf 
with KFD. This check was added when we basically sidestepped all the 
dmabuf attachment code and just extracted the amdgpu_bo ourselves. I 
don't think we want to keep doing that.

Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf 
import" sent to amd-gfx on March 16. I'm about to send an updated 
version of this as part of upstream RDMA support soon.

Regards,
   Felix


> +		gobj = dma_buf->priv;
> +		abo = gem_to_amdgpu_bo(gobj);
> +		if (gobj->dev == dev && abo->kfd_bo) {
> +			drm_gem_object_get(gobj);
> +			return gobj;
> +		}
> +	}
> +
> +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
> +}
> +
>   static int
>   kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>   		      struct amdgpu_bo **bo)
> @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>   		}
>   	}
>   
> -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
>   	if (IS_ERR(gobj))
>   		return PTR_ERR(gobj);
>   
> @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   {
>   	struct amdkfd_process_info *process_info = mem->process_info;
>   	unsigned long bo_size = mem->bo->tbo.base.size;
> +	bool is_imported = false;
> +	struct drm_gem_object *imported_gobj;
>   	struct kfd_mem_attachment *entry, *tmp;
>   	struct bo_vm_reservation_context ctx;
>   	struct ttm_validate_buffer *bo_list_entry;
>   	unsigned int mapped_to_gpu_memory;
>   	int ret;
> -	bool is_imported = false;
>   
>   	mutex_lock(&mem->lock);
>   
> @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   	}
>   
>   	/* Free the BO*/
> -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> +	if (!is_imported) {
> +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> +	} else {
> +		imported_gobj = mem->dmabuf->priv;
> +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> +	}
> +
>   	if (mem->dmabuf)
>   		dma_buf_put(mem->dmabuf);
>   	mutex_destroy(&mem->lock);
> @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>   				      uint64_t *mmap_offset)
>   {
>   	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> -	struct drm_gem_object *obj;
> -	struct amdgpu_bo *bo;
> +	struct drm_gem_object *imported_gobj, *gobj;
> +	struct amdgpu_bo *imported_bo, *bo;
>   	int ret;
>   
> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -		/* Can't handle non-graphics buffers */
> +	/*
> +	 * Can't handle non-graphics buffers and
> +	 * buffers from other devices
> +	 */
> +	imported_gobj = dma_buf->priv;
> +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
> +	    drm_to_adev(imported_gobj->dev) != adev)
>   		return -EINVAL;
>   
> -	obj = dma_buf->priv;
> -	if (drm_to_adev(obj->dev) != adev)
> -		/* Can't handle buffers from other devices */
> +	/* Only VRAM and GTT BOs are supported */
> +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
> +	if (!(imported_bo->preferred_domains &
> +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
>   		return -EINVAL;
>   
> -	bo = gem_to_amdgpu_bo(obj);
> -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
> -				    AMDGPU_GEM_DOMAIN_GTT)))
> -		/* Only VRAM and GTT BOs are supported */
> -		return -EINVAL;
> +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
> +	if (ret)
> +		return ret;
>   
> -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> -	if (!*mem)
> -		return -ENOMEM;
> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
> +	if (IS_ERR(gobj)) {
> +		ret = PTR_ERR(gobj);
> +		goto err_import;
> +	}
>   
> -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
> -	if (ret) {
> -		kfree(mem);
> -		return ret;
> +	bo = gem_to_amdgpu_bo(gobj);
> +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
> +
> +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> +	if (!*mem) {
> +		ret = -ENOMEM;
> +		goto err_alloc_mem;
>   	}
>   
>   	if (size)
> -		*size = amdgpu_bo_size(bo);
> +		*size = amdgpu_bo_size(imported_bo);
>   
>   	if (mmap_offset)
> -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
> +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
>   
>   	INIT_LIST_HEAD(&(*mem)->attachments);
>   	mutex_init(&(*mem)->lock);
>   
>   	(*mem)->alloc_flags =
> -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>   		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
>   		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
>   		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>   
> -	drm_gem_object_get(&bo->tbo.base);
> +	get_dma_buf(dma_buf);
> +	(*mem)->dmabuf = dma_buf;
>   	(*mem)->bo = bo;
>   	(*mem)->va = va;
> -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
> +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
>   	(*mem)->mapped_to_gpu_memory = 0;
>   	(*mem)->process_info = avm->process_info;
>   	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>   	amdgpu_sync_create(&(*mem)->sync);
>   	(*mem)->is_imported = true;
> +	bo->kfd_bo = *mem;
>   
>   	return 0;
> +err_import:
> +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> +err_alloc_mem:
> +	drm_gem_object_put(gobj);
> +	return ret;
>   }
>   
>   /* Evict a userptr BO by stopping the queues if necessary

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-25 14:20   ` Felix Kuehling
@ 2022-07-26  0:15     ` Lang Yu
  2022-07-26  0:32       ` Felix Kuehling
  0 siblings, 1 reply; 13+ messages in thread
From: Lang Yu @ 2022-07-26  0:15 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Alex Deucher, Huang Rui, Christian Koenig, amd-gfx

On 07/25/ , Felix Kuehling wrote:
> 
> Am 2022-07-25 um 06:32 schrieb Lang Yu:
> > We have memory leak issue in current implenmention, i.e.,
> > the allocated struct kgd_mem memory is not handled properly.
> > 
> > The idea is always creating a buffer object when importing
> > a gfx BO based dmabuf.
> > 
> > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > ---
> >   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
> >   1 file changed, 70 insertions(+), 29 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index b3806ebe5ef7..c1855b72a3f0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> >   	u32 alloc_flags = bo->kfd_bo->alloc_flags;
> >   	u64 size = amdgpu_bo_size(bo);
> > -	unreserve_mem_limit(adev, size, alloc_flags);
> > +	if (!bo->kfd_bo->is_imported)
> > +		unreserve_mem_limit(adev, size, alloc_flags);
> >   	kfree(bo->kfd_bo);
> >   }
> > @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
> >   	}
> >   }
> > +static struct drm_gem_object*
> > +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
> > +{
> > +	struct drm_gem_object *gobj;
> > +	struct amdgpu_bo *abo;
> > +
> > +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> 
> I'd rather remove this limitation. We should be able to use any DMABuf with
> KFD. This check was added when we basically sidestepped all the dmabuf
> attachment code and just extracted the amdgpu_bo ourselves. I don't think we
> want to keep doing that.

This limitation here is to just reference the gobj if it is an amdgpu bo
and from same device instead of creating a gobj when importing a dmabuf.

> Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
> import" sent to amd-gfx on March 16. I'm about to send an updated version of
> this as part of upstream RDMA support soon.

The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
memory leak issue. Looking forward to the updated version. Thanks!

Regards,
Lang

> Regards,
>   Felix
> 
> 
> > +		gobj = dma_buf->priv;
> > +		abo = gem_to_amdgpu_bo(gobj);
> > +		if (gobj->dev == dev && abo->kfd_bo) {
> > +			drm_gem_object_get(gobj);
> > +			return gobj;
> > +		}
> > +	}
> > +
> > +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
> > +}
> > +
> >   static int
> >   kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> >   		      struct amdgpu_bo **bo)
> > @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> >   		}
> >   	}
> > -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
> > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
> >   	if (IS_ERR(gobj))
> >   		return PTR_ERR(gobj);
> > @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> >   {
> >   	struct amdkfd_process_info *process_info = mem->process_info;
> >   	unsigned long bo_size = mem->bo->tbo.base.size;
> > +	bool is_imported = false;
> > +	struct drm_gem_object *imported_gobj;
> >   	struct kfd_mem_attachment *entry, *tmp;
> >   	struct bo_vm_reservation_context ctx;
> >   	struct ttm_validate_buffer *bo_list_entry;
> >   	unsigned int mapped_to_gpu_memory;
> >   	int ret;
> > -	bool is_imported = false;
> >   	mutex_lock(&mem->lock);
> > @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> >   	}
> >   	/* Free the BO*/
> > -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > +	if (!is_imported) {
> > +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > +	} else {
> > +		imported_gobj = mem->dmabuf->priv;
> > +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > +	}
> > +
> >   	if (mem->dmabuf)
> >   		dma_buf_put(mem->dmabuf);
> >   	mutex_destroy(&mem->lock);
> > @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
> >   				      uint64_t *mmap_offset)
> >   {
> >   	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> > -	struct drm_gem_object *obj;
> > -	struct amdgpu_bo *bo;
> > +	struct drm_gem_object *imported_gobj, *gobj;
> > +	struct amdgpu_bo *imported_bo, *bo;
> >   	int ret;
> > -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> > -		/* Can't handle non-graphics buffers */
> > +	/*
> > +	 * Can't handle non-graphics buffers and
> > +	 * buffers from other devices
> > +	 */
> > +	imported_gobj = dma_buf->priv;
> > +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
> > +	    drm_to_adev(imported_gobj->dev) != adev)
> >   		return -EINVAL;
> > -	obj = dma_buf->priv;
> > -	if (drm_to_adev(obj->dev) != adev)
> > -		/* Can't handle buffers from other devices */
> > +	/* Only VRAM and GTT BOs are supported */
> > +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
> > +	if (!(imported_bo->preferred_domains &
> > +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
> >   		return -EINVAL;
> > -	bo = gem_to_amdgpu_bo(obj);
> > -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
> > -				    AMDGPU_GEM_DOMAIN_GTT)))
> > -		/* Only VRAM and GTT BOs are supported */
> > -		return -EINVAL;
> > +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
> > +	if (ret)
> > +		return ret;
> > -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > -	if (!*mem)
> > -		return -ENOMEM;
> > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
> > +	if (IS_ERR(gobj)) {
> > +		ret = PTR_ERR(gobj);
> > +		goto err_import;
> > +	}
> > -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
> > -	if (ret) {
> > -		kfree(mem);
> > -		return ret;
> > +	bo = gem_to_amdgpu_bo(gobj);
> > +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
> > +
> > +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > +	if (!*mem) {
> > +		ret = -ENOMEM;
> > +		goto err_alloc_mem;
> >   	}
> >   	if (size)
> > -		*size = amdgpu_bo_size(bo);
> > +		*size = amdgpu_bo_size(imported_bo);
> >   	if (mmap_offset)
> > -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
> > +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
> >   	INIT_LIST_HEAD(&(*mem)->attachments);
> >   	mutex_init(&(*mem)->lock);
> >   	(*mem)->alloc_flags =
> > -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> >   		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
> >   		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
> >   		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> > -	drm_gem_object_get(&bo->tbo.base);
> > +	get_dma_buf(dma_buf);
> > +	(*mem)->dmabuf = dma_buf;
> >   	(*mem)->bo = bo;
> >   	(*mem)->va = va;
> > -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
> > +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
> >   	(*mem)->mapped_to_gpu_memory = 0;
> >   	(*mem)->process_info = avm->process_info;
> >   	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
> >   	amdgpu_sync_create(&(*mem)->sync);
> >   	(*mem)->is_imported = true;
> > +	bo->kfd_bo = *mem;
> >   	return 0;
> > +err_import:
> > +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > +err_alloc_mem:
> > +	drm_gem_object_put(gobj);
> > +	return ret;
> >   }
> >   /* Evict a userptr BO by stopping the queues if necessary

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-26  0:15     ` Lang Yu
@ 2022-07-26  0:32       ` Felix Kuehling
  2022-07-26  0:40         ` Lang Yu
  0 siblings, 1 reply; 13+ messages in thread
From: Felix Kuehling @ 2022-07-26  0:32 UTC (permalink / raw)
  To: Lang Yu; +Cc: Alex Deucher, Huang Rui, Christian Koenig, amd-gfx

Am 2022-07-25 um 20:15 schrieb Lang Yu:
> On 07/25/ , Felix Kuehling wrote:
>> Am 2022-07-25 um 06:32 schrieb Lang Yu:
>>> We have memory leak issue in current implenmention, i.e.,
>>> the allocated struct kgd_mem memory is not handled properly.
>>>
>>> The idea is always creating a buffer object when importing
>>> a gfx BO based dmabuf.
>>>
>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>> ---
>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
>>>    1 file changed, 70 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index b3806ebe5ef7..c1855b72a3f0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>>>    	u32 alloc_flags = bo->kfd_bo->alloc_flags;
>>>    	u64 size = amdgpu_bo_size(bo);
>>> -	unreserve_mem_limit(adev, size, alloc_flags);
>>> +	if (!bo->kfd_bo->is_imported)
>>> +		unreserve_mem_limit(adev, size, alloc_flags);
>>>    	kfree(bo->kfd_bo);
>>>    }
>>> @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
>>>    	}
>>>    }
>>> +static struct drm_gem_object*
>>> +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
>>> +{
>>> +	struct drm_gem_object *gobj;
>>> +	struct amdgpu_bo *abo;
>>> +
>>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>> I'd rather remove this limitation. We should be able to use any DMABuf with
>> KFD. This check was added when we basically sidestepped all the dmabuf
>> attachment code and just extracted the amdgpu_bo ourselves. I don't think we
>> want to keep doing that.
> This limitation here is to just reference the gobj if it is an amdgpu bo
> and from same device instead of creating a gobj when importing a dmabuf.
>
>> Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
>> import" sent to amd-gfx on March 16. I'm about to send an updated version of
>> this as part of upstream RDMA support soon.
> The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
> memory leak issue. Looking forward to the updated version. Thanks!

Maybe we're trying to fix different problems. Can you give a more 
detailed explanation of the memory leak you're seeing? It's not obvious 
to me.

The problem I'm trying to solve is, to ensure that each exported BO only 
has a single dmabuf because we run into problems with GEM if we have 
multiple dmabufs pointing to the same GEM object. That also enables 
re-exporting dmabufs of imported BOs. At the same time I'm removing any 
limitations of the types of BOs we can import, and trying to eliminate 
any custom dmabuf handling in KFD.

Regards,
   Felix


>
> Regards,
> Lang
>
>> Regards,
>>    Felix
>>
>>
>>> +		gobj = dma_buf->priv;
>>> +		abo = gem_to_amdgpu_bo(gobj);
>>> +		if (gobj->dev == dev && abo->kfd_bo) {
>>> +			drm_gem_object_get(gobj);
>>> +			return gobj;
>>> +		}
>>> +	}
>>> +
>>> +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
>>> +}
>>> +
>>>    static int
>>>    kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>    		      struct amdgpu_bo **bo)
>>> @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>    		}
>>>    	}
>>> -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
>>> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
>>>    	if (IS_ERR(gobj))
>>>    		return PTR_ERR(gobj);
>>> @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>    {
>>>    	struct amdkfd_process_info *process_info = mem->process_info;
>>>    	unsigned long bo_size = mem->bo->tbo.base.size;
>>> +	bool is_imported = false;
>>> +	struct drm_gem_object *imported_gobj;
>>>    	struct kfd_mem_attachment *entry, *tmp;
>>>    	struct bo_vm_reservation_context ctx;
>>>    	struct ttm_validate_buffer *bo_list_entry;
>>>    	unsigned int mapped_to_gpu_memory;
>>>    	int ret;
>>> -	bool is_imported = false;
>>>    	mutex_lock(&mem->lock);
>>> @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>    	}
>>>    	/* Free the BO*/
>>> -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>> +	if (!is_imported) {
>>> +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>> +	} else {
>>> +		imported_gobj = mem->dmabuf->priv;
>>> +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>> +	}
>>> +
>>>    	if (mem->dmabuf)
>>>    		dma_buf_put(mem->dmabuf);
>>>    	mutex_destroy(&mem->lock);
>>> @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>>>    				      uint64_t *mmap_offset)
>>>    {
>>>    	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
>>> -	struct drm_gem_object *obj;
>>> -	struct amdgpu_bo *bo;
>>> +	struct drm_gem_object *imported_gobj, *gobj;
>>> +	struct amdgpu_bo *imported_bo, *bo;
>>>    	int ret;
>>> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
>>> -		/* Can't handle non-graphics buffers */
>>> +	/*
>>> +	 * Can't handle non-graphics buffers and
>>> +	 * buffers from other devices
>>> +	 */
>>> +	imported_gobj = dma_buf->priv;
>>> +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
>>> +	    drm_to_adev(imported_gobj->dev) != adev)
>>>    		return -EINVAL;
>>> -	obj = dma_buf->priv;
>>> -	if (drm_to_adev(obj->dev) != adev)
>>> -		/* Can't handle buffers from other devices */
>>> +	/* Only VRAM and GTT BOs are supported */
>>> +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
>>> +	if (!(imported_bo->preferred_domains &
>>> +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
>>>    		return -EINVAL;
>>> -	bo = gem_to_amdgpu_bo(obj);
>>> -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
>>> -				    AMDGPU_GEM_DOMAIN_GTT)))
>>> -		/* Only VRAM and GTT BOs are supported */
>>> -		return -EINVAL;
>>> +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
>>> +	if (ret)
>>> +		return ret;
>>> -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>> -	if (!*mem)
>>> -		return -ENOMEM;
>>> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
>>> +	if (IS_ERR(gobj)) {
>>> +		ret = PTR_ERR(gobj);
>>> +		goto err_import;
>>> +	}
>>> -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
>>> -	if (ret) {
>>> -		kfree(mem);
>>> -		return ret;
>>> +	bo = gem_to_amdgpu_bo(gobj);
>>> +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
>>> +
>>> +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>> +	if (!*mem) {
>>> +		ret = -ENOMEM;
>>> +		goto err_alloc_mem;
>>>    	}
>>>    	if (size)
>>> -		*size = amdgpu_bo_size(bo);
>>> +		*size = amdgpu_bo_size(imported_bo);
>>>    	if (mmap_offset)
>>> -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
>>> +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
>>>    	INIT_LIST_HEAD(&(*mem)->attachments);
>>>    	mutex_init(&(*mem)->lock);
>>>    	(*mem)->alloc_flags =
>>> -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>> +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>    		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
>>>    		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
>>>    		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>>> -	drm_gem_object_get(&bo->tbo.base);
>>> +	get_dma_buf(dma_buf);
>>> +	(*mem)->dmabuf = dma_buf;
>>>    	(*mem)->bo = bo;
>>>    	(*mem)->va = va;
>>> -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>> -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
>>> +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
>>>    	(*mem)->mapped_to_gpu_memory = 0;
>>>    	(*mem)->process_info = avm->process_info;
>>>    	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>>>    	amdgpu_sync_create(&(*mem)->sync);
>>>    	(*mem)->is_imported = true;
>>> +	bo->kfd_bo = *mem;
>>>    	return 0;
>>> +err_import:
>>> +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>> +err_alloc_mem:
>>> +	drm_gem_object_put(gobj);
>>> +	return ret;
>>>    }
>>>    /* Evict a userptr BO by stopping the queues if necessary

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-26  0:32       ` Felix Kuehling
@ 2022-07-26  0:40         ` Lang Yu
  2022-07-26  1:48           ` Felix Kuehling
  0 siblings, 1 reply; 13+ messages in thread
From: Lang Yu @ 2022-07-26  0:40 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Alex Deucher, Huang Rui, Christian Koenig, amd-gfx

On 07/25/ , Felix Kuehling wrote:
> Am 2022-07-25 um 20:15 schrieb Lang Yu:
> > On 07/25/ , Felix Kuehling wrote:
> > > Am 2022-07-25 um 06:32 schrieb Lang Yu:
> > > > We have memory leak issue in current implenmention, i.e.,
> > > > the allocated struct kgd_mem memory is not handled properly.
> > > > 
> > > > The idea is always creating a buffer object when importing
> > > > a gfx BO based dmabuf.
> > > > 
> > > > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > > > ---
> > > >    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
> > > >    1 file changed, 70 insertions(+), 29 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > index b3806ebe5ef7..c1855b72a3f0 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> > > >    	u32 alloc_flags = bo->kfd_bo->alloc_flags;
> > > >    	u64 size = amdgpu_bo_size(bo);
> > > > -	unreserve_mem_limit(adev, size, alloc_flags);
> > > > +	if (!bo->kfd_bo->is_imported)
> > > > +		unreserve_mem_limit(adev, size, alloc_flags);
> > > >    	kfree(bo->kfd_bo);
> > > >    }
> > > > @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
> > > >    	}
> > > >    }
> > > > +static struct drm_gem_object*
> > > > +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
> > > > +{
> > > > +	struct drm_gem_object *gobj;
> > > > +	struct amdgpu_bo *abo;
> > > > +
> > > > +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> > > I'd rather remove this limitation. We should be able to use any DMABuf with
> > > KFD. This check was added when we basically sidestepped all the dmabuf
> > > attachment code and just extracted the amdgpu_bo ourselves. I don't think we
> > > want to keep doing that.
> > This limitation here is to just reference the gobj if it is an amdgpu bo
> > and from same device instead of creating a gobj when importing a dmabuf.
> > 
> > > Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
> > > import" sent to amd-gfx on March 16. I'm about to send an updated version of
> > > this as part of upstream RDMA support soon.
> > The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
> > memory leak issue. Looking forward to the updated version. Thanks!
> 
> Maybe we're trying to fix different problems. Can you give a more detailed
> explanation of the memory leak you're seeing? It's not obvious to me.

The struct kgd_mem is allocted by "*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);",
but it is not assigned to bo->kfd_bo like this "bo->kfd_bo = *mem;". Then *mem will 
never be freed.

Regards,
Lang

> The problem I'm trying to solve is, to ensure that each exported BO only has
> a single dmabuf because we run into problems with GEM if we have multiple
> dmabufs pointing to the same GEM object. That also enables re-exporting
> dmabufs of imported BOs. At the same time I'm removing any limitations of
> the types of BOs we can import, and trying to eliminate any custom dmabuf
> handling in KFD.
> 
> Regards,
>   Felix
> 
> 
> > 
> > Regards,
> > Lang
> > 
> > > Regards,
> > >    Felix
> > > 
> > > 
> > > > +		gobj = dma_buf->priv;
> > > > +		abo = gem_to_amdgpu_bo(gobj);
> > > > +		if (gobj->dev == dev && abo->kfd_bo) {
> > > > +			drm_gem_object_get(gobj);
> > > > +			return gobj;
> > > > +		}
> > > > +	}
> > > > +
> > > > +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
> > > > +}
> > > > +
> > > >    static int
> > > >    kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> > > >    		      struct amdgpu_bo **bo)
> > > > @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> > > >    		}
> > > >    	}
> > > > -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
> > > > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
> > > >    	if (IS_ERR(gobj))
> > > >    		return PTR_ERR(gobj);
> > > > @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > >    {
> > > >    	struct amdkfd_process_info *process_info = mem->process_info;
> > > >    	unsigned long bo_size = mem->bo->tbo.base.size;
> > > > +	bool is_imported = false;
> > > > +	struct drm_gem_object *imported_gobj;
> > > >    	struct kfd_mem_attachment *entry, *tmp;
> > > >    	struct bo_vm_reservation_context ctx;
> > > >    	struct ttm_validate_buffer *bo_list_entry;
> > > >    	unsigned int mapped_to_gpu_memory;
> > > >    	int ret;
> > > > -	bool is_imported = false;
> > > >    	mutex_lock(&mem->lock);
> > > > @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > >    	}
> > > >    	/* Free the BO*/
> > > > -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > > > +	if (!is_imported) {
> > > > +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > > > +	} else {
> > > > +		imported_gobj = mem->dmabuf->priv;
> > > > +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > > > +	}
> > > > +
> > > >    	if (mem->dmabuf)
> > > >    		dma_buf_put(mem->dmabuf);
> > > >    	mutex_destroy(&mem->lock);
> > > > @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
> > > >    				      uint64_t *mmap_offset)
> > > >    {
> > > >    	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> > > > -	struct drm_gem_object *obj;
> > > > -	struct amdgpu_bo *bo;
> > > > +	struct drm_gem_object *imported_gobj, *gobj;
> > > > +	struct amdgpu_bo *imported_bo, *bo;
> > > >    	int ret;
> > > > -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> > > > -		/* Can't handle non-graphics buffers */
> > > > +	/*
> > > > +	 * Can't handle non-graphics buffers and
> > > > +	 * buffers from other devices
> > > > +	 */
> > > > +	imported_gobj = dma_buf->priv;
> > > > +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
> > > > +	    drm_to_adev(imported_gobj->dev) != adev)
> > > >    		return -EINVAL;
> > > > -	obj = dma_buf->priv;
> > > > -	if (drm_to_adev(obj->dev) != adev)
> > > > -		/* Can't handle buffers from other devices */
> > > > +	/* Only VRAM and GTT BOs are supported */
> > > > +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
> > > > +	if (!(imported_bo->preferred_domains &
> > > > +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
> > > >    		return -EINVAL;
> > > > -	bo = gem_to_amdgpu_bo(obj);
> > > > -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
> > > > -				    AMDGPU_GEM_DOMAIN_GTT)))
> > > > -		/* Only VRAM and GTT BOs are supported */
> > > > -		return -EINVAL;
> > > > +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
> > > > +	if (ret)
> > > > +		return ret;
> > > > -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > > > -	if (!*mem)
> > > > -		return -ENOMEM;
> > > > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
> > > > +	if (IS_ERR(gobj)) {
> > > > +		ret = PTR_ERR(gobj);
> > > > +		goto err_import;
> > > > +	}
> > > > -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
> > > > -	if (ret) {
> > > > -		kfree(mem);
> > > > -		return ret;
> > > > +	bo = gem_to_amdgpu_bo(gobj);
> > > > +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
> > > > +
> > > > +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > > > +	if (!*mem) {
> > > > +		ret = -ENOMEM;
> > > > +		goto err_alloc_mem;
> > > >    	}
> > > >    	if (size)
> > > > -		*size = amdgpu_bo_size(bo);
> > > > +		*size = amdgpu_bo_size(imported_bo);
> > > >    	if (mmap_offset)
> > > > -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
> > > > +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
> > > >    	INIT_LIST_HEAD(&(*mem)->attachments);
> > > >    	mutex_init(&(*mem)->lock);
> > > >    	(*mem)->alloc_flags =
> > > > -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > > +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > >    		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
> > > >    		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
> > > >    		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> > > > -	drm_gem_object_get(&bo->tbo.base);
> > > > +	get_dma_buf(dma_buf);
> > > > +	(*mem)->dmabuf = dma_buf;
> > > >    	(*mem)->bo = bo;
> > > >    	(*mem)->va = va;
> > > > -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > > -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
> > > > +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
> > > >    	(*mem)->mapped_to_gpu_memory = 0;
> > > >    	(*mem)->process_info = avm->process_info;
> > > >    	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
> > > >    	amdgpu_sync_create(&(*mem)->sync);
> > > >    	(*mem)->is_imported = true;
> > > > +	bo->kfd_bo = *mem;
> > > >    	return 0;
> > > > +err_import:
> > > > +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > > > +err_alloc_mem:
> > > > +	drm_gem_object_put(gobj);
> > > > +	return ret;
> > > >    }
> > > >    /* Evict a userptr BO by stopping the queues if necessary

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-26  0:40         ` Lang Yu
@ 2022-07-26  1:48           ` Felix Kuehling
  2022-07-26  2:18             ` Lang Yu
  0 siblings, 1 reply; 13+ messages in thread
From: Felix Kuehling @ 2022-07-26  1:48 UTC (permalink / raw)
  To: Lang Yu; +Cc: Alex Deucher, Huang Rui, Christian Koenig, amd-gfx

Am 2022-07-25 um 20:40 schrieb Lang Yu:
> On 07/25/ , Felix Kuehling wrote:
>> Am 2022-07-25 um 20:15 schrieb Lang Yu:
>>> On 07/25/ , Felix Kuehling wrote:
>>>> Am 2022-07-25 um 06:32 schrieb Lang Yu:
>>>>> We have memory leak issue in current implenmention, i.e.,
>>>>> the allocated struct kgd_mem memory is not handled properly.
>>>>>
>>>>> The idea is always creating a buffer object when importing
>>>>> a gfx BO based dmabuf.
>>>>>
>>>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>>>> ---
>>>>>     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
>>>>>     1 file changed, 70 insertions(+), 29 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> index b3806ebe5ef7..c1855b72a3f0 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>>>>>     	u32 alloc_flags = bo->kfd_bo->alloc_flags;
>>>>>     	u64 size = amdgpu_bo_size(bo);
>>>>> -	unreserve_mem_limit(adev, size, alloc_flags);
>>>>> +	if (!bo->kfd_bo->is_imported)
>>>>> +		unreserve_mem_limit(adev, size, alloc_flags);
>>>>>     	kfree(bo->kfd_bo);
>>>>>     }
>>>>> @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
>>>>>     	}
>>>>>     }
>>>>> +static struct drm_gem_object*
>>>>> +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
>>>>> +{
>>>>> +	struct drm_gem_object *gobj;
>>>>> +	struct amdgpu_bo *abo;
>>>>> +
>>>>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>> I'd rather remove this limitation. We should be able to use any DMABuf with
>>>> KFD. This check was added when we basically sidestepped all the dmabuf
>>>> attachment code and just extracted the amdgpu_bo ourselves. I don't think we
>>>> want to keep doing that.
>>> This limitation here is to just reference the gobj if it is an amdgpu bo
>>> and from same device instead of creating a gobj when importing a dmabuf.
>>>
>>>> Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
>>>> import" sent to amd-gfx on March 16. I'm about to send an updated version of
>>>> this as part of upstream RDMA support soon.
>>> The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
>>> memory leak issue. Looking forward to the updated version. Thanks!
>> Maybe we're trying to fix different problems. Can you give a more detailed
>> explanation of the memory leak you're seeing? It's not obvious to me.
> The struct kgd_mem is allocted by "*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);",
> but it is not assigned to bo->kfd_bo like this "bo->kfd_bo = *mem;". Then *mem will
> never be freed.

True. With the current upstream driver there is no way this can happen, 
because we don't have an API for KFD to export a dmabuf in a way that 
could later be imported. But with the RDMA and IPC features we're 
working on, this becomes a real possibility.

Your solution is to ensure that the dmabuf import always creates a new 
amdgpu_bo. But that can add a lot of overhead. How about this idea: In 
amdgpu_amdkfd_gpuvm_free_memory_of_gpu we could add this after 
drm_gem_object_put:

	if (mem->bo->kfd_bo != mem)
		kfree(mem);

That way amdgpu_amdkfd_release_notify would only be responsible for 
freeing the original kgd_mem. Any imports will be freed explicitly.

Regards,
   Felix


>
> Regards,
> Lang
>
>> The problem I'm trying to solve is, to ensure that each exported BO only has
>> a single dmabuf because we run into problems with GEM if we have multiple
>> dmabufs pointing to the same GEM object. That also enables re-exporting
>> dmabufs of imported BOs. At the same time I'm removing any limitations of
>> the types of BOs we can import, and trying to eliminate any custom dmabuf
>> handling in KFD.
>>
>> Regards,
>>    Felix
>>
>>
>>> Regards,
>>> Lang
>>>
>>>> Regards,
>>>>     Felix
>>>>
>>>>
>>>>> +		gobj = dma_buf->priv;
>>>>> +		abo = gem_to_amdgpu_bo(gobj);
>>>>> +		if (gobj->dev == dev && abo->kfd_bo) {
>>>>> +			drm_gem_object_get(gobj);
>>>>> +			return gobj;
>>>>> +		}
>>>>> +	}
>>>>> +
>>>>> +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
>>>>> +}
>>>>> +
>>>>>     static int
>>>>>     kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>>>     		      struct amdgpu_bo **bo)
>>>>> @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>>>     		}
>>>>>     	}
>>>>> -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
>>>>> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
>>>>>     	if (IS_ERR(gobj))
>>>>>     		return PTR_ERR(gobj);
>>>>> @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>>     {
>>>>>     	struct amdkfd_process_info *process_info = mem->process_info;
>>>>>     	unsigned long bo_size = mem->bo->tbo.base.size;
>>>>> +	bool is_imported = false;
>>>>> +	struct drm_gem_object *imported_gobj;
>>>>>     	struct kfd_mem_attachment *entry, *tmp;
>>>>>     	struct bo_vm_reservation_context ctx;
>>>>>     	struct ttm_validate_buffer *bo_list_entry;
>>>>>     	unsigned int mapped_to_gpu_memory;
>>>>>     	int ret;
>>>>> -	bool is_imported = false;
>>>>>     	mutex_lock(&mem->lock);
>>>>> @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>>     	}
>>>>>     	/* Free the BO*/
>>>>> -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>>>> +	if (!is_imported) {
>>>>> +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>>>> +	} else {
>>>>> +		imported_gobj = mem->dmabuf->priv;
>>>>> +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>>>> +	}
>>>>> +
>>>>>     	if (mem->dmabuf)
>>>>>     		dma_buf_put(mem->dmabuf);
>>>>>     	mutex_destroy(&mem->lock);
>>>>> @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>>>>>     				      uint64_t *mmap_offset)
>>>>>     {
>>>>>     	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
>>>>> -	struct drm_gem_object *obj;
>>>>> -	struct amdgpu_bo *bo;
>>>>> +	struct drm_gem_object *imported_gobj, *gobj;
>>>>> +	struct amdgpu_bo *imported_bo, *bo;
>>>>>     	int ret;
>>>>> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
>>>>> -		/* Can't handle non-graphics buffers */
>>>>> +	/*
>>>>> +	 * Can't handle non-graphics buffers and
>>>>> +	 * buffers from other devices
>>>>> +	 */
>>>>> +	imported_gobj = dma_buf->priv;
>>>>> +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
>>>>> +	    drm_to_adev(imported_gobj->dev) != adev)
>>>>>     		return -EINVAL;
>>>>> -	obj = dma_buf->priv;
>>>>> -	if (drm_to_adev(obj->dev) != adev)
>>>>> -		/* Can't handle buffers from other devices */
>>>>> +	/* Only VRAM and GTT BOs are supported */
>>>>> +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
>>>>> +	if (!(imported_bo->preferred_domains &
>>>>> +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
>>>>>     		return -EINVAL;
>>>>> -	bo = gem_to_amdgpu_bo(obj);
>>>>> -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
>>>>> -				    AMDGPU_GEM_DOMAIN_GTT)))
>>>>> -		/* Only VRAM and GTT BOs are supported */
>>>>> -		return -EINVAL;
>>>>> +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
>>>>> +	if (ret)
>>>>> +		return ret;
>>>>> -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>>>> -	if (!*mem)
>>>>> -		return -ENOMEM;
>>>>> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
>>>>> +	if (IS_ERR(gobj)) {
>>>>> +		ret = PTR_ERR(gobj);
>>>>> +		goto err_import;
>>>>> +	}
>>>>> -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
>>>>> -	if (ret) {
>>>>> -		kfree(mem);
>>>>> -		return ret;
>>>>> +	bo = gem_to_amdgpu_bo(gobj);
>>>>> +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
>>>>> +
>>>>> +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>>>> +	if (!*mem) {
>>>>> +		ret = -ENOMEM;
>>>>> +		goto err_alloc_mem;
>>>>>     	}
>>>>>     	if (size)
>>>>> -		*size = amdgpu_bo_size(bo);
>>>>> +		*size = amdgpu_bo_size(imported_bo);
>>>>>     	if (mmap_offset)
>>>>> -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
>>>>> +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
>>>>>     	INIT_LIST_HEAD(&(*mem)->attachments);
>>>>>     	mutex_init(&(*mem)->lock);
>>>>>     	(*mem)->alloc_flags =
>>>>> -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>> +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>>     		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
>>>>>     		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
>>>>>     		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>>>>> -	drm_gem_object_get(&bo->tbo.base);
>>>>> +	get_dma_buf(dma_buf);
>>>>> +	(*mem)->dmabuf = dma_buf;
>>>>>     	(*mem)->bo = bo;
>>>>>     	(*mem)->va = va;
>>>>> -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>> -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
>>>>> +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
>>>>>     	(*mem)->mapped_to_gpu_memory = 0;
>>>>>     	(*mem)->process_info = avm->process_info;
>>>>>     	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>>>>>     	amdgpu_sync_create(&(*mem)->sync);
>>>>>     	(*mem)->is_imported = true;
>>>>> +	bo->kfd_bo = *mem;
>>>>>     	return 0;
>>>>> +err_import:
>>>>> +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>>>> +err_alloc_mem:
>>>>> +	drm_gem_object_put(gobj);
>>>>> +	return ret;
>>>>>     }
>>>>>     /* Evict a userptr BO by stopping the queues if necessary

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-26  1:48           ` Felix Kuehling
@ 2022-07-26  2:18             ` Lang Yu
  2022-07-26  3:03               ` Felix Kuehling
  0 siblings, 1 reply; 13+ messages in thread
From: Lang Yu @ 2022-07-26  2:18 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Alex Deucher, Huang Rui, Christian Koenig, amd-gfx

On 07/25/ , Felix Kuehling wrote:
> Am 2022-07-25 um 20:40 schrieb Lang Yu:
> > On 07/25/ , Felix Kuehling wrote:
> > > Am 2022-07-25 um 20:15 schrieb Lang Yu:
> > > > On 07/25/ , Felix Kuehling wrote:
> > > > > Am 2022-07-25 um 06:32 schrieb Lang Yu:
> > > > > > We have memory leak issue in current implenmention, i.e.,
> > > > > > the allocated struct kgd_mem memory is not handled properly.
> > > > > > 
> > > > > > The idea is always creating a buffer object when importing
> > > > > > a gfx BO based dmabuf.
> > > > > > 
> > > > > > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > > > > > ---
> > > > > >     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
> > > > > >     1 file changed, 70 insertions(+), 29 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > index b3806ebe5ef7..c1855b72a3f0 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> > > > > >     	u32 alloc_flags = bo->kfd_bo->alloc_flags;
> > > > > >     	u64 size = amdgpu_bo_size(bo);
> > > > > > -	unreserve_mem_limit(adev, size, alloc_flags);
> > > > > > +	if (!bo->kfd_bo->is_imported)
> > > > > > +		unreserve_mem_limit(adev, size, alloc_flags);
> > > > > >     	kfree(bo->kfd_bo);
> > > > > >     }
> > > > > > @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
> > > > > >     	}
> > > > > >     }
> > > > > > +static struct drm_gem_object*
> > > > > > +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
> > > > > > +{
> > > > > > +	struct drm_gem_object *gobj;
> > > > > > +	struct amdgpu_bo *abo;
> > > > > > +
> > > > > > +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> > > > > I'd rather remove this limitation. We should be able to use any DMABuf with
> > > > > KFD. This check was added when we basically sidestepped all the dmabuf
> > > > > attachment code and just extracted the amdgpu_bo ourselves. I don't think we
> > > > > want to keep doing that.
> > > > This limitation here is to just reference the gobj if it is an amdgpu bo
> > > > and from same device instead of creating a gobj when importing a dmabuf.
> > > > 
> > > > > Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
> > > > > import" sent to amd-gfx on March 16. I'm about to send an updated version of
> > > > > this as part of upstream RDMA support soon.
> > > > The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
> > > > memory leak issue. Looking forward to the updated version. Thanks!
> > > Maybe we're trying to fix different problems. Can you give a more detailed
> > > explanation of the memory leak you're seeing? It's not obvious to me.
> > The struct kgd_mem is allocted by "*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);",
> > but it is not assigned to bo->kfd_bo like this "bo->kfd_bo = *mem;". Then *mem will
> > never be freed.
> 
> True. With the current upstream driver there is no way this can happen,
> because we don't have an API for KFD to export a dmabuf in a way that could
> later be imported. But with the RDMA and IPC features we're working on, this
> becomes a real possibility.
> 
> Your solution is to ensure that the dmabuf import always creates a new
> amdgpu_bo. But that can add a lot of overhead. How about this idea: In
> amdgpu_amdkfd_gpuvm_free_memory_of_gpu we could add this after
> drm_gem_object_put:
> 
> 	if (mem->bo->kfd_bo != mem)
> 		kfree(mem);

This way will turn a imported BO(e.g., a gfx BO) to a kfd BO , i.e.,
assign *mem to bo->kfd_bo. I'm not sure whether it makes sense
to modify the original BO like this.

The overhead is drm_prime_pages_to_sg + dma_map_sgtable when importing a
gfx dmabuf from same device.

Regards,
Lang

> That way amdgpu_amdkfd_release_notify would only be responsible for freeing
> the original kgd_mem. Any imports will be freed explicitly.
> 
> Regards,
>   Felix
> 
> 
> > 
> > Regards,
> > Lang
> > 
> > > The problem I'm trying to solve is, to ensure that each exported BO only has
> > > a single dmabuf because we run into problems with GEM if we have multiple
> > > dmabufs pointing to the same GEM object. That also enables re-exporting
> > > dmabufs of imported BOs. At the same time I'm removing any limitations of
> > > the types of BOs we can import, and trying to eliminate any custom dmabuf
> > > handling in KFD.
> > > 
> > > Regards,
> > >    Felix
> > > 
> > > 
> > > > Regards,
> > > > Lang
> > > > 
> > > > > Regards,
> > > > >     Felix
> > > > > 
> > > > > 
> > > > > > +		gobj = dma_buf->priv;
> > > > > > +		abo = gem_to_amdgpu_bo(gobj);
> > > > > > +		if (gobj->dev == dev && abo->kfd_bo) {
> > > > > > +			drm_gem_object_get(gobj);
> > > > > > +			return gobj;
> > > > > > +		}
> > > > > > +	}
> > > > > > +
> > > > > > +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
> > > > > > +}
> > > > > > +
> > > > > >     static int
> > > > > >     kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> > > > > >     		      struct amdgpu_bo **bo)
> > > > > > @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> > > > > >     		}
> > > > > >     	}
> > > > > > -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
> > > > > > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
> > > > > >     	if (IS_ERR(gobj))
> > > > > >     		return PTR_ERR(gobj);
> > > > > > @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > > > >     {
> > > > > >     	struct amdkfd_process_info *process_info = mem->process_info;
> > > > > >     	unsigned long bo_size = mem->bo->tbo.base.size;
> > > > > > +	bool is_imported = false;
> > > > > > +	struct drm_gem_object *imported_gobj;
> > > > > >     	struct kfd_mem_attachment *entry, *tmp;
> > > > > >     	struct bo_vm_reservation_context ctx;
> > > > > >     	struct ttm_validate_buffer *bo_list_entry;
> > > > > >     	unsigned int mapped_to_gpu_memory;
> > > > > >     	int ret;
> > > > > > -	bool is_imported = false;
> > > > > >     	mutex_lock(&mem->lock);
> > > > > > @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > > > >     	}
> > > > > >     	/* Free the BO*/
> > > > > > -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > > > > > +	if (!is_imported) {
> > > > > > +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > > > > > +	} else {
> > > > > > +		imported_gobj = mem->dmabuf->priv;
> > > > > > +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > > > > > +	}
> > > > > > +
> > > > > >     	if (mem->dmabuf)
> > > > > >     		dma_buf_put(mem->dmabuf);
> > > > > >     	mutex_destroy(&mem->lock);
> > > > > > @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
> > > > > >     				      uint64_t *mmap_offset)
> > > > > >     {
> > > > > >     	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> > > > > > -	struct drm_gem_object *obj;
> > > > > > -	struct amdgpu_bo *bo;
> > > > > > +	struct drm_gem_object *imported_gobj, *gobj;
> > > > > > +	struct amdgpu_bo *imported_bo, *bo;
> > > > > >     	int ret;
> > > > > > -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> > > > > > -		/* Can't handle non-graphics buffers */
> > > > > > +	/*
> > > > > > +	 * Can't handle non-graphics buffers and
> > > > > > +	 * buffers from other devices
> > > > > > +	 */
> > > > > > +	imported_gobj = dma_buf->priv;
> > > > > > +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
> > > > > > +	    drm_to_adev(imported_gobj->dev) != adev)
> > > > > >     		return -EINVAL;
> > > > > > -	obj = dma_buf->priv;
> > > > > > -	if (drm_to_adev(obj->dev) != adev)
> > > > > > -		/* Can't handle buffers from other devices */
> > > > > > +	/* Only VRAM and GTT BOs are supported */
> > > > > > +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
> > > > > > +	if (!(imported_bo->preferred_domains &
> > > > > > +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
> > > > > >     		return -EINVAL;
> > > > > > -	bo = gem_to_amdgpu_bo(obj);
> > > > > > -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
> > > > > > -				    AMDGPU_GEM_DOMAIN_GTT)))
> > > > > > -		/* Only VRAM and GTT BOs are supported */
> > > > > > -		return -EINVAL;
> > > > > > +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
> > > > > > +	if (ret)
> > > > > > +		return ret;
> > > > > > -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > > > > > -	if (!*mem)
> > > > > > -		return -ENOMEM;
> > > > > > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
> > > > > > +	if (IS_ERR(gobj)) {
> > > > > > +		ret = PTR_ERR(gobj);
> > > > > > +		goto err_import;
> > > > > > +	}
> > > > > > -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
> > > > > > -	if (ret) {
> > > > > > -		kfree(mem);
> > > > > > -		return ret;
> > > > > > +	bo = gem_to_amdgpu_bo(gobj);
> > > > > > +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
> > > > > > +
> > > > > > +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > > > > > +	if (!*mem) {
> > > > > > +		ret = -ENOMEM;
> > > > > > +		goto err_alloc_mem;
> > > > > >     	}
> > > > > >     	if (size)
> > > > > > -		*size = amdgpu_bo_size(bo);
> > > > > > +		*size = amdgpu_bo_size(imported_bo);
> > > > > >     	if (mmap_offset)
> > > > > > -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
> > > > > > +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
> > > > > >     	INIT_LIST_HEAD(&(*mem)->attachments);
> > > > > >     	mutex_init(&(*mem)->lock);
> > > > > >     	(*mem)->alloc_flags =
> > > > > > -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > > > > +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > > > >     		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
> > > > > >     		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
> > > > > >     		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> > > > > > -	drm_gem_object_get(&bo->tbo.base);
> > > > > > +	get_dma_buf(dma_buf);
> > > > > > +	(*mem)->dmabuf = dma_buf;
> > > > > >     	(*mem)->bo = bo;
> > > > > >     	(*mem)->va = va;
> > > > > > -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > > > > -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
> > > > > > +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
> > > > > >     	(*mem)->mapped_to_gpu_memory = 0;
> > > > > >     	(*mem)->process_info = avm->process_info;
> > > > > >     	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
> > > > > >     	amdgpu_sync_create(&(*mem)->sync);
> > > > > >     	(*mem)->is_imported = true;
> > > > > > +	bo->kfd_bo = *mem;
> > > > > >     	return 0;
> > > > > > +err_import:
> > > > > > +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > > > > > +err_alloc_mem:
> > > > > > +	drm_gem_object_put(gobj);
> > > > > > +	return ret;
> > > > > >     }
> > > > > >     /* Evict a userptr BO by stopping the queues if necessary

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-26  2:18             ` Lang Yu
@ 2022-07-26  3:03               ` Felix Kuehling
  2022-07-26  3:40                 ` Lang Yu
  0 siblings, 1 reply; 13+ messages in thread
From: Felix Kuehling @ 2022-07-26  3:03 UTC (permalink / raw)
  To: Lang Yu; +Cc: Alex Deucher, Huang Rui, Christian Koenig, amd-gfx


Am 2022-07-25 um 22:18 schrieb Lang Yu:
> On 07/25/ , Felix Kuehling wrote:
>> Am 2022-07-25 um 20:40 schrieb Lang Yu:
>>> On 07/25/ , Felix Kuehling wrote:
>>>> Am 2022-07-25 um 20:15 schrieb Lang Yu:
>>>>> On 07/25/ , Felix Kuehling wrote:
>>>>>> Am 2022-07-25 um 06:32 schrieb Lang Yu:
>>>>>>> We have memory leak issue in current implenmention, i.e.,
>>>>>>> the allocated struct kgd_mem memory is not handled properly.
>>>>>>>
>>>>>>> The idea is always creating a buffer object when importing
>>>>>>> a gfx BO based dmabuf.
>>>>>>>
>>>>>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>>>>>> ---
>>>>>>>      .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
>>>>>>>      1 file changed, 70 insertions(+), 29 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>> index b3806ebe5ef7..c1855b72a3f0 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>>> @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>>>>>>>      	u32 alloc_flags = bo->kfd_bo->alloc_flags;
>>>>>>>      	u64 size = amdgpu_bo_size(bo);
>>>>>>> -	unreserve_mem_limit(adev, size, alloc_flags);
>>>>>>> +	if (!bo->kfd_bo->is_imported)
>>>>>>> +		unreserve_mem_limit(adev, size, alloc_flags);
>>>>>>>      	kfree(bo->kfd_bo);
>>>>>>>      }
>>>>>>> @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
>>>>>>>      	}
>>>>>>>      }
>>>>>>> +static struct drm_gem_object*
>>>>>>> +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
>>>>>>> +{
>>>>>>> +	struct drm_gem_object *gobj;
>>>>>>> +	struct amdgpu_bo *abo;
>>>>>>> +
>>>>>>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>>>> I'd rather remove this limitation. We should be able to use any DMABuf with
>>>>>> KFD. This check was added when we basically sidestepped all the dmabuf
>>>>>> attachment code and just extracted the amdgpu_bo ourselves. I don't think we
>>>>>> want to keep doing that.
>>>>> This limitation here is to just reference the gobj if it is an amdgpu bo
>>>>> and from same device instead of creating a gobj when importing a dmabuf.
>>>>>
>>>>>> Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
>>>>>> import" sent to amd-gfx on March 16. I'm about to send an updated version of
>>>>>> this as part of upstream RDMA support soon.
>>>>> The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
>>>>> memory leak issue. Looking forward to the updated version. Thanks!
>>>> Maybe we're trying to fix different problems. Can you give a more detailed
>>>> explanation of the memory leak you're seeing? It's not obvious to me.
>>> The struct kgd_mem is allocted by "*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);",
>>> but it is not assigned to bo->kfd_bo like this "bo->kfd_bo = *mem;". Then *mem will
>>> never be freed.
>> True. With the current upstream driver there is no way this can happen,
>> because we don't have an API for KFD to export a dmabuf in a way that could
>> later be imported. But with the RDMA and IPC features we're working on, this
>> becomes a real possibility.
>>
>> Your solution is to ensure that the dmabuf import always creates a new
>> amdgpu_bo. But that can add a lot of overhead. How about this idea: In
>> amdgpu_amdkfd_gpuvm_free_memory_of_gpu we could add this after
>> drm_gem_object_put:
>>
>> 	if (mem->bo->kfd_bo != mem)
>> 		kfree(mem);
> This way will turn a imported BO(e.g., a gfx BO) to a kfd BO , i.e.,
> assign *mem to bo->kfd_bo. I'm not sure whether it makes sense
> to modify the original BO like this.

No. The point is that it won't. bo->kfd_bo should only be set for BOs 
that were originally allocated with KFD. Any import won't change the 
bo->kfd_bo. So the condition I suggested will be true, and 
amdgpu_amdkfd_gpuvm_free_memory_of_gpu will kfree the kgd_mem structure 
itself. Only the original allocation will use the release notifier to 
free the kgd_mem.


>
> The overhead is drm_prime_pages_to_sg + dma_map_sgtable when importing a
> gfx dmabuf from same device.

Yes. The dma address arrays are pretty significant, because they store 
4KB PTEs. I'd like to avoid duplicating those for imports, if I can.

Regards,
   Felix


>
> Regards,
> Lang
>
>> That way amdgpu_amdkfd_release_notify would only be responsible for freeing
>> the original kgd_mem. Any imports will be freed explicitly.
>>
>> Regards,
>>    Felix
>>
>>
>>> Regards,
>>> Lang
>>>
>>>> The problem I'm trying to solve is, to ensure that each exported BO only has
>>>> a single dmabuf because we run into problems with GEM if we have multiple
>>>> dmabufs pointing to the same GEM object. That also enables re-exporting
>>>> dmabufs of imported BOs. At the same time I'm removing any limitations of
>>>> the types of BOs we can import, and trying to eliminate any custom dmabuf
>>>> handling in KFD.
>>>>
>>>> Regards,
>>>>     Felix
>>>>
>>>>
>>>>> Regards,
>>>>> Lang
>>>>>
>>>>>> Regards,
>>>>>>      Felix
>>>>>>
>>>>>>
>>>>>>> +		gobj = dma_buf->priv;
>>>>>>> +		abo = gem_to_amdgpu_bo(gobj);
>>>>>>> +		if (gobj->dev == dev && abo->kfd_bo) {
>>>>>>> +			drm_gem_object_get(gobj);
>>>>>>> +			return gobj;
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
>>>>>>> +}
>>>>>>> +
>>>>>>>      static int
>>>>>>>      kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>>>>>      		      struct amdgpu_bo **bo)
>>>>>>> @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>>>>>      		}
>>>>>>>      	}
>>>>>>> -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
>>>>>>> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
>>>>>>>      	if (IS_ERR(gobj))
>>>>>>>      		return PTR_ERR(gobj);
>>>>>>> @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>>>>      {
>>>>>>>      	struct amdkfd_process_info *process_info = mem->process_info;
>>>>>>>      	unsigned long bo_size = mem->bo->tbo.base.size;
>>>>>>> +	bool is_imported = false;
>>>>>>> +	struct drm_gem_object *imported_gobj;
>>>>>>>      	struct kfd_mem_attachment *entry, *tmp;
>>>>>>>      	struct bo_vm_reservation_context ctx;
>>>>>>>      	struct ttm_validate_buffer *bo_list_entry;
>>>>>>>      	unsigned int mapped_to_gpu_memory;
>>>>>>>      	int ret;
>>>>>>> -	bool is_imported = false;
>>>>>>>      	mutex_lock(&mem->lock);
>>>>>>> @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>>>>      	}
>>>>>>>      	/* Free the BO*/
>>>>>>> -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>>>>>> +	if (!is_imported) {
>>>>>>> +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>>>>>> +	} else {
>>>>>>> +		imported_gobj = mem->dmabuf->priv;
>>>>>>> +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>>>>>> +	}
>>>>>>> +
>>>>>>>      	if (mem->dmabuf)
>>>>>>>      		dma_buf_put(mem->dmabuf);
>>>>>>>      	mutex_destroy(&mem->lock);
>>>>>>> @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>>>>>>>      				      uint64_t *mmap_offset)
>>>>>>>      {
>>>>>>>      	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
>>>>>>> -	struct drm_gem_object *obj;
>>>>>>> -	struct amdgpu_bo *bo;
>>>>>>> +	struct drm_gem_object *imported_gobj, *gobj;
>>>>>>> +	struct amdgpu_bo *imported_bo, *bo;
>>>>>>>      	int ret;
>>>>>>> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
>>>>>>> -		/* Can't handle non-graphics buffers */
>>>>>>> +	/*
>>>>>>> +	 * Can't handle non-graphics buffers and
>>>>>>> +	 * buffers from other devices
>>>>>>> +	 */
>>>>>>> +	imported_gobj = dma_buf->priv;
>>>>>>> +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
>>>>>>> +	    drm_to_adev(imported_gobj->dev) != adev)
>>>>>>>      		return -EINVAL;
>>>>>>> -	obj = dma_buf->priv;
>>>>>>> -	if (drm_to_adev(obj->dev) != adev)
>>>>>>> -		/* Can't handle buffers from other devices */
>>>>>>> +	/* Only VRAM and GTT BOs are supported */
>>>>>>> +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
>>>>>>> +	if (!(imported_bo->preferred_domains &
>>>>>>> +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
>>>>>>>      		return -EINVAL;
>>>>>>> -	bo = gem_to_amdgpu_bo(obj);
>>>>>>> -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
>>>>>>> -				    AMDGPU_GEM_DOMAIN_GTT)))
>>>>>>> -		/* Only VRAM and GTT BOs are supported */
>>>>>>> -		return -EINVAL;
>>>>>>> +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
>>>>>>> +	if (ret)
>>>>>>> +		return ret;
>>>>>>> -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>>>>>> -	if (!*mem)
>>>>>>> -		return -ENOMEM;
>>>>>>> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
>>>>>>> +	if (IS_ERR(gobj)) {
>>>>>>> +		ret = PTR_ERR(gobj);
>>>>>>> +		goto err_import;
>>>>>>> +	}
>>>>>>> -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
>>>>>>> -	if (ret) {
>>>>>>> -		kfree(mem);
>>>>>>> -		return ret;
>>>>>>> +	bo = gem_to_amdgpu_bo(gobj);
>>>>>>> +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
>>>>>>> +
>>>>>>> +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>>>>>> +	if (!*mem) {
>>>>>>> +		ret = -ENOMEM;
>>>>>>> +		goto err_alloc_mem;
>>>>>>>      	}
>>>>>>>      	if (size)
>>>>>>> -		*size = amdgpu_bo_size(bo);
>>>>>>> +		*size = amdgpu_bo_size(imported_bo);
>>>>>>>      	if (mmap_offset)
>>>>>>> -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
>>>>>>> +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
>>>>>>>      	INIT_LIST_HEAD(&(*mem)->attachments);
>>>>>>>      	mutex_init(&(*mem)->lock);
>>>>>>>      	(*mem)->alloc_flags =
>>>>>>> -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>>>> +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>>>>      		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
>>>>>>>      		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
>>>>>>>      		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>>>>>>> -	drm_gem_object_get(&bo->tbo.base);
>>>>>>> +	get_dma_buf(dma_buf);
>>>>>>> +	(*mem)->dmabuf = dma_buf;
>>>>>>>      	(*mem)->bo = bo;
>>>>>>>      	(*mem)->va = va;
>>>>>>> -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>>>> -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
>>>>>>> +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
>>>>>>>      	(*mem)->mapped_to_gpu_memory = 0;
>>>>>>>      	(*mem)->process_info = avm->process_info;
>>>>>>>      	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>>>>>>>      	amdgpu_sync_create(&(*mem)->sync);
>>>>>>>      	(*mem)->is_imported = true;
>>>>>>> +	bo->kfd_bo = *mem;
>>>>>>>      	return 0;
>>>>>>> +err_import:
>>>>>>> +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>>>>>> +err_alloc_mem:
>>>>>>> +	drm_gem_object_put(gobj);
>>>>>>> +	return ret;
>>>>>>>      }
>>>>>>>      /* Evict a userptr BO by stopping the queues if necessary

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
  2022-07-26  3:03               ` Felix Kuehling
@ 2022-07-26  3:40                 ` Lang Yu
  0 siblings, 0 replies; 13+ messages in thread
From: Lang Yu @ 2022-07-26  3:40 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: Alex Deucher, Huang Rui, Christian Koenig, amd-gfx

On 07/25/ , Felix Kuehling wrote:
> 
> Am 2022-07-25 um 22:18 schrieb Lang Yu:
> > On 07/25/ , Felix Kuehling wrote:
> > > Am 2022-07-25 um 20:40 schrieb Lang Yu:
> > > > On 07/25/ , Felix Kuehling wrote:
> > > > > Am 2022-07-25 um 20:15 schrieb Lang Yu:
> > > > > > On 07/25/ , Felix Kuehling wrote:
> > > > > > > Am 2022-07-25 um 06:32 schrieb Lang Yu:
> > > > > > > > We have memory leak issue in current implenmention, i.e.,
> > > > > > > > the allocated struct kgd_mem memory is not handled properly.
> > > > > > > > 
> > > > > > > > The idea is always creating a buffer object when importing
> > > > > > > > a gfx BO based dmabuf.
> > > > > > > > 
> > > > > > > > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > > > > > > > ---
> > > > > > > >      .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
> > > > > > > >      1 file changed, 70 insertions(+), 29 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > > > index b3806ebe5ef7..c1855b72a3f0 100644
> > > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > > > @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> > > > > > > >      	u32 alloc_flags = bo->kfd_bo->alloc_flags;
> > > > > > > >      	u64 size = amdgpu_bo_size(bo);
> > > > > > > > -	unreserve_mem_limit(adev, size, alloc_flags);
> > > > > > > > +	if (!bo->kfd_bo->is_imported)
> > > > > > > > +		unreserve_mem_limit(adev, size, alloc_flags);
> > > > > > > >      	kfree(bo->kfd_bo);
> > > > > > > >      }
> > > > > > > > @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
> > > > > > > >      	}
> > > > > > > >      }
> > > > > > > > +static struct drm_gem_object*
> > > > > > > > +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
> > > > > > > > +{
> > > > > > > > +	struct drm_gem_object *gobj;
> > > > > > > > +	struct amdgpu_bo *abo;
> > > > > > > > +
> > > > > > > > +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> > > > > > > I'd rather remove this limitation. We should be able to use any DMABuf with
> > > > > > > KFD. This check was added when we basically sidestepped all the dmabuf
> > > > > > > attachment code and just extracted the amdgpu_bo ourselves. I don't think we
> > > > > > > want to keep doing that.
> > > > > > This limitation here is to just reference the gobj if it is an amdgpu bo
> > > > > > and from same device instead of creating a gobj when importing a dmabuf.
> > > > > > 
> > > > > > > Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
> > > > > > > import" sent to amd-gfx on March 16. I'm about to send an updated version of
> > > > > > > this as part of upstream RDMA support soon.
> > > > > > The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
> > > > > > memory leak issue. Looking forward to the updated version. Thanks!
> > > > > Maybe we're trying to fix different problems. Can you give a more detailed
> > > > > explanation of the memory leak you're seeing? It's not obvious to me.
> > > > The struct kgd_mem is allocted by "*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);",
> > > > but it is not assigned to bo->kfd_bo like this "bo->kfd_bo = *mem;". Then *mem will
> > > > never be freed.
> > > True. With the current upstream driver there is no way this can happen,
> > > because we don't have an API for KFD to export a dmabuf in a way that could
> > > later be imported. But with the RDMA and IPC features we're working on, this
> > > becomes a real possibility.
> > > 
> > > Your solution is to ensure that the dmabuf import always creates a new
> > > amdgpu_bo. But that can add a lot of overhead. How about this idea: In
> > > amdgpu_amdkfd_gpuvm_free_memory_of_gpu we could add this after
> > > drm_gem_object_put:
> > > 
> > > 	if (mem->bo->kfd_bo != mem)
> > > 		kfree(mem);
> > This way will turn a imported BO(e.g., a gfx BO) to a kfd BO , i.e.,
> > assign *mem to bo->kfd_bo. I'm not sure whether it makes sense
> > to modify the original BO like this.
> 
> No. The point is that it won't. bo->kfd_bo should only be set for BOs that
> were originally allocated with KFD. Any import won't change the bo->kfd_bo.
> So the condition I suggested will be true, and
> amdgpu_amdkfd_gpuvm_free_memory_of_gpu will kfree the kgd_mem structure
> itself. Only the original allocation will use the release notifier to free
> the kgd_mem.

Thanks, got it. That's a good idea!

Regards,
Lang

> > 
> > The overhead is drm_prime_pages_to_sg + dma_map_sgtable when importing a
> > gfx dmabuf from same device.
> 
> Yes. The dma address arrays are pretty significant, because they store 4KB
> PTEs. I'd like to avoid duplicating those for imports, if I can.
> 
> Regards,
>   Felix
> 
> 
> > 
> > Regards,
> > Lang
> > 
> > > That way amdgpu_amdkfd_release_notify would only be responsible for freeing
> > > the original kgd_mem. Any imports will be freed explicitly.
> > > 
> > > Regards,
> > >    Felix
> > > 
> > > 
> > > > Regards,
> > > > Lang
> > > > 
> > > > > The problem I'm trying to solve is, to ensure that each exported BO only has
> > > > > a single dmabuf because we run into problems with GEM if we have multiple
> > > > > dmabufs pointing to the same GEM object. That also enables re-exporting
> > > > > dmabufs of imported BOs. At the same time I'm removing any limitations of
> > > > > the types of BOs we can import, and trying to eliminate any custom dmabuf
> > > > > handling in KFD.
> > > > > 
> > > > > Regards,
> > > > >     Felix
> > > > > 
> > > > > 
> > > > > > Regards,
> > > > > > Lang
> > > > > > 
> > > > > > > Regards,
> > > > > > >      Felix
> > > > > > > 
> > > > > > > 
> > > > > > > > +		gobj = dma_buf->priv;
> > > > > > > > +		abo = gem_to_amdgpu_bo(gobj);
> > > > > > > > +		if (gobj->dev == dev && abo->kfd_bo) {
> > > > > > > > +			drm_gem_object_get(gobj);
> > > > > > > > +			return gobj;
> > > > > > > > +		}
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > > +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >      static int
> > > > > > > >      kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> > > > > > > >      		      struct amdgpu_bo **bo)
> > > > > > > > @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> > > > > > > >      		}
> > > > > > > >      	}
> > > > > > > > -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
> > > > > > > > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
> > > > > > > >      	if (IS_ERR(gobj))
> > > > > > > >      		return PTR_ERR(gobj);
> > > > > > > > @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > > > > > >      {
> > > > > > > >      	struct amdkfd_process_info *process_info = mem->process_info;
> > > > > > > >      	unsigned long bo_size = mem->bo->tbo.base.size;
> > > > > > > > +	bool is_imported = false;
> > > > > > > > +	struct drm_gem_object *imported_gobj;
> > > > > > > >      	struct kfd_mem_attachment *entry, *tmp;
> > > > > > > >      	struct bo_vm_reservation_context ctx;
> > > > > > > >      	struct ttm_validate_buffer *bo_list_entry;
> > > > > > > >      	unsigned int mapped_to_gpu_memory;
> > > > > > > >      	int ret;
> > > > > > > > -	bool is_imported = false;
> > > > > > > >      	mutex_lock(&mem->lock);
> > > > > > > > @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > > > > > >      	}
> > > > > > > >      	/* Free the BO*/
> > > > > > > > -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > > > > > > > +	if (!is_imported) {
> > > > > > > > +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> > > > > > > > +	} else {
> > > > > > > > +		imported_gobj = mem->dmabuf->priv;
> > > > > > > > +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > >      	if (mem->dmabuf)
> > > > > > > >      		dma_buf_put(mem->dmabuf);
> > > > > > > >      	mutex_destroy(&mem->lock);
> > > > > > > > @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
> > > > > > > >      				      uint64_t *mmap_offset)
> > > > > > > >      {
> > > > > > > >      	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> > > > > > > > -	struct drm_gem_object *obj;
> > > > > > > > -	struct amdgpu_bo *bo;
> > > > > > > > +	struct drm_gem_object *imported_gobj, *gobj;
> > > > > > > > +	struct amdgpu_bo *imported_bo, *bo;
> > > > > > > >      	int ret;
> > > > > > > > -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> > > > > > > > -		/* Can't handle non-graphics buffers */
> > > > > > > > +	/*
> > > > > > > > +	 * Can't handle non-graphics buffers and
> > > > > > > > +	 * buffers from other devices
> > > > > > > > +	 */
> > > > > > > > +	imported_gobj = dma_buf->priv;
> > > > > > > > +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
> > > > > > > > +	    drm_to_adev(imported_gobj->dev) != adev)
> > > > > > > >      		return -EINVAL;
> > > > > > > > -	obj = dma_buf->priv;
> > > > > > > > -	if (drm_to_adev(obj->dev) != adev)
> > > > > > > > -		/* Can't handle buffers from other devices */
> > > > > > > > +	/* Only VRAM and GTT BOs are supported */
> > > > > > > > +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
> > > > > > > > +	if (!(imported_bo->preferred_domains &
> > > > > > > > +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
> > > > > > > >      		return -EINVAL;
> > > > > > > > -	bo = gem_to_amdgpu_bo(obj);
> > > > > > > > -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
> > > > > > > > -				    AMDGPU_GEM_DOMAIN_GTT)))
> > > > > > > > -		/* Only VRAM and GTT BOs are supported */
> > > > > > > > -		return -EINVAL;
> > > > > > > > +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
> > > > > > > > +	if (ret)
> > > > > > > > +		return ret;
> > > > > > > > -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > > > > > > > -	if (!*mem)
> > > > > > > > -		return -ENOMEM;
> > > > > > > > +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
> > > > > > > > +	if (IS_ERR(gobj)) {
> > > > > > > > +		ret = PTR_ERR(gobj);
> > > > > > > > +		goto err_import;
> > > > > > > > +	}
> > > > > > > > -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
> > > > > > > > -	if (ret) {
> > > > > > > > -		kfree(mem);
> > > > > > > > -		return ret;
> > > > > > > > +	bo = gem_to_amdgpu_bo(gobj);
> > > > > > > > +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
> > > > > > > > +
> > > > > > > > +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> > > > > > > > +	if (!*mem) {
> > > > > > > > +		ret = -ENOMEM;
> > > > > > > > +		goto err_alloc_mem;
> > > > > > > >      	}
> > > > > > > >      	if (size)
> > > > > > > > -		*size = amdgpu_bo_size(bo);
> > > > > > > > +		*size = amdgpu_bo_size(imported_bo);
> > > > > > > >      	if (mmap_offset)
> > > > > > > > -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
> > > > > > > > +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
> > > > > > > >      	INIT_LIST_HEAD(&(*mem)->attachments);
> > > > > > > >      	mutex_init(&(*mem)->lock);
> > > > > > > >      	(*mem)->alloc_flags =
> > > > > > > > -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > > > > > > +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > > > > > >      		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
> > > > > > > >      		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
> > > > > > > >      		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> > > > > > > > -	drm_gem_object_get(&bo->tbo.base);
> > > > > > > > +	get_dma_buf(dma_buf);
> > > > > > > > +	(*mem)->dmabuf = dma_buf;
> > > > > > > >      	(*mem)->bo = bo;
> > > > > > > >      	(*mem)->va = va;
> > > > > > > > -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> > > > > > > > -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
> > > > > > > > +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
> > > > > > > >      	(*mem)->mapped_to_gpu_memory = 0;
> > > > > > > >      	(*mem)->process_info = avm->process_info;
> > > > > > > >      	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
> > > > > > > >      	amdgpu_sync_create(&(*mem)->sync);
> > > > > > > >      	(*mem)->is_imported = true;
> > > > > > > > +	bo->kfd_bo = *mem;
> > > > > > > >      	return 0;
> > > > > > > > +err_import:
> > > > > > > > +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> > > > > > > > +err_alloc_mem:
> > > > > > > > +	drm_gem_object_put(gobj);
> > > > > > > > +	return ret;
> > > > > > > >      }
> > > > > > > >      /* Evict a userptr BO by stopping the queues if necessary

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref
  2022-07-25 10:32 ` [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref Lang Yu
@ 2022-07-28  2:33   ` Yu, Lang
  2022-07-28  2:45   ` Felix Kuehling
  1 sibling, 0 replies; 13+ messages in thread
From: Yu, Lang @ 2022-07-28  2:33 UTC (permalink / raw)
  To: amd-gfx
  Cc: Deucher, Alexander, Kuehling, Felix, Huang, Ray, Koenig, Christian

[Public]

Ping for this single patch. 

>-----Original Message-----
>From: Yu, Lang <Lang.Yu@amd.com>
>Sent: Monday, July 25, 2022 6:32 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Koenig, Christian
><Christian.Koenig@amd.com>; Deucher, Alexander
><Alexander.Deucher@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Yu, Lang
><Lang.Yu@amd.com>
>Subject: [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref
>
>No need to reference the BO here, dmabuf framework will handle that.
>
>Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 -
> 1 file changed, 1 deletion(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>index c1855b72a3f0..802c762108b2 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>@@ -827,7 +827,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev,
>struct kgd_mem *mem,
>
> 	*bo = gem_to_amdgpu_bo(gobj);
> 	(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
>-	(*bo)->parent = amdgpu_bo_ref(mem->bo);
>
> 	return 0;
> }
>--
>2.25.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref
  2022-07-25 10:32 ` [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref Lang Yu
  2022-07-28  2:33   ` Yu, Lang
@ 2022-07-28  2:45   ` Felix Kuehling
  1 sibling, 0 replies; 13+ messages in thread
From: Felix Kuehling @ 2022-07-28  2:45 UTC (permalink / raw)
  To: Lang Yu, amd-gfx; +Cc: Alex Deucher, Huang Rui, Christian Koenig


Am 2022-07-25 um 06:32 schrieb Lang Yu:
> No need to reference the BO here, dmabuf framework will handle that.

OK. I guess I needed to do that manually for the userptr attachment, and 
copy/pasted it unnecessarily for the dmabuf attachment.

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


>
> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 -
>   1 file changed, 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index c1855b72a3f0..802c762108b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -827,7 +827,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>   
>   	*bo = gem_to_amdgpu_bo(gobj);
>   	(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
> -	(*bo)->parent = amdgpu_bo_ref(mem->bo);
>   
>   	return 0;
>   }

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-07-28  2:45 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-25 10:32 [PATCH 1/3] drm/amdgpu: extract a helper to import dmabuf more flexibly Lang Yu
2022-07-25 10:32 ` [PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling Lang Yu
2022-07-25 14:20   ` Felix Kuehling
2022-07-26  0:15     ` Lang Yu
2022-07-26  0:32       ` Felix Kuehling
2022-07-26  0:40         ` Lang Yu
2022-07-26  1:48           ` Felix Kuehling
2022-07-26  2:18             ` Lang Yu
2022-07-26  3:03               ` Felix Kuehling
2022-07-26  3:40                 ` Lang Yu
2022-07-25 10:32 ` [PATCH 3/3] drm/amdkfd: remove an unnecessary amdgpu_bo_ref Lang Yu
2022-07-28  2:33   ` Yu, Lang
2022-07-28  2:45   ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.