All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs
@ 2021-11-04 23:05 Felix Kuehling
  2021-11-04 23:36 ` Errabolu, Ramesh
  2021-11-05  7:11 ` Christian König
  0 siblings, 2 replies; 3+ messages in thread
From: Felix Kuehling @ 2021-11-04 23:05 UTC (permalink / raw)
  To: amd-gfx; +Cc: ramesh.errabolu

If a kfd_bo was shared (e.g. a dmabuf export), the original kfd_bo may be
freed when the amdgpu_bo still lives on. Free the kfd_bo struct in the
release_notify callback then the amdgpu_bo is freed.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c       |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4accd584886b..5f658823a637 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -307,7 +307,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
 void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 #else
 static inline
@@ -322,7 +322,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 }
 
 static inline
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 }
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 5174762f0b46..94fccf0b47ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -201,7 +201,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	u32 domain = bo->preferred_domains;
@@ -213,6 +213,8 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
 	}
 
 	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+
+	kfree(bo->kfd_bo);
 }
 
 
@@ -1599,9 +1601,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
 	if (mem->dmabuf)
 		dma_buf_put(mem->dmabuf);
-	drm_gem_object_put(&mem->bo->tbo.base);
 	mutex_destroy(&mem->lock);
-	kfree(mem);
+
+	/* If this releases the last reference, it will end up calling
+	 * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
+	 * this needs to be the last call here.
+	 */
+	drm_gem_object_put(&mem->bo->tbo.base);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6b25982a9077..156002db24e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1279,7 +1279,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 	abo = ttm_to_amdgpu_bo(bo);
 
 	if (abo->kfd_bo)
-		amdgpu_amdkfd_unreserve_memory_limit(abo);
+		amdgpu_amdkfd_release_notify(abo);
 
 	/* We only remove the fence if the resv has individualized. */
 	WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH 1/1] drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs
  2021-11-04 23:05 [PATCH 1/1] drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs Felix Kuehling
@ 2021-11-04 23:36 ` Errabolu, Ramesh
  2021-11-05  7:11 ` Christian König
  1 sibling, 0 replies; 3+ messages in thread
From: Errabolu, Ramesh @ 2021-11-04 23:36 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

Reviewed-By: Ramesh Errabolu <Ramesh.Errabolu@amd.com

-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling@amd.com> 
Sent: Thursday, November 4, 2021 6:05 PM
To: amd-gfx@lists.freedesktop.org
Cc: Errabolu, Ramesh <Ramesh.Errabolu@amd.com>
Subject: [PATCH 1/1] drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs

If a kfd_bo was shared (e.g. a dmabuf export), the original kfd_bo may be freed when the amdgpu_bo still lives on. Free the kfd_bo struct in the release_notify callback then the amdgpu_bo is freed.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c       |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4accd584886b..5f658823a637 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -307,7 +307,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
 void amdgpu_amdkfd_reserve_system_mem(uint64_t size);  #else  static inline @@ -322,7 +322,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,  }
 
 static inline
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 }
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 5174762f0b46..94fccf0b47ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -201,7 +201,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	u32 domain = bo->preferred_domains;
@@ -213,6 +213,8 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
 	}
 
 	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+
+	kfree(bo->kfd_bo);
 }
 
 
@@ -1599,9 +1601,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
 	if (mem->dmabuf)
 		dma_buf_put(mem->dmabuf);
-	drm_gem_object_put(&mem->bo->tbo.base);
 	mutex_destroy(&mem->lock);
-	kfree(mem);
+
+	/* If this releases the last reference, it will end up calling
+	 * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
+	 * this needs to be the last call here.
+	 */
+	drm_gem_object_put(&mem->bo->tbo.base);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6b25982a9077..156002db24e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1279,7 +1279,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 	abo = ttm_to_amdgpu_bo(bo);
 
 	if (abo->kfd_bo)
-		amdgpu_amdkfd_unreserve_memory_limit(abo);
+		amdgpu_amdkfd_release_notify(abo);
 
 	/* We only remove the fence if the resv has individualized. */
 	WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
--
2.32.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs
  2021-11-04 23:05 [PATCH 1/1] drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs Felix Kuehling
  2021-11-04 23:36 ` Errabolu, Ramesh
@ 2021-11-05  7:11 ` Christian König
  1 sibling, 0 replies; 3+ messages in thread
From: Christian König @ 2021-11-05  7:11 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx; +Cc: ramesh.errabolu

Am 05.11.21 um 00:05 schrieb Felix Kuehling:
> If a kfd_bo was shared (e.g. a dmabuf export), the original kfd_bo may be
> freed when the amdgpu_bo still lives on. Free the kfd_bo struct in the
> release_notify callback then the amdgpu_bo is freed.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c       |  2 +-
>   3 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 4accd584886b..5f658823a637 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -307,7 +307,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
>   void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>   void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>   				struct amdgpu_vm *vm);
> -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
> +void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
>   void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
>   #else
>   static inline
> @@ -322,7 +322,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>   }
>   
>   static inline
> -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
> +void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>   {
>   }
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 5174762f0b46..94fccf0b47ad 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -201,7 +201,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
>   	spin_unlock(&kfd_mem_limit.mem_limit_lock);
>   }
>   
> -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
> +void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>   {
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>   	u32 domain = bo->preferred_domains;
> @@ -213,6 +213,8 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
>   	}
>   
>   	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
> +
> +	kfree(bo->kfd_bo);
>   }
>   
>   
> @@ -1599,9 +1601,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>   	if (mem->dmabuf)
>   		dma_buf_put(mem->dmabuf);
> -	drm_gem_object_put(&mem->bo->tbo.base);
>   	mutex_destroy(&mem->lock);
> -	kfree(mem);
> +
> +	/* If this releases the last reference, it will end up calling
> +	 * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
> +	 * this needs to be the last call here.
> +	 */
> +	drm_gem_object_put(&mem->bo->tbo.base);
>   
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6b25982a9077..156002db24e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1279,7 +1279,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>   	abo = ttm_to_amdgpu_bo(bo);
>   
>   	if (abo->kfd_bo)
> -		amdgpu_amdkfd_unreserve_memory_limit(abo);
> +		amdgpu_amdkfd_release_notify(abo);
>   
>   	/* We only remove the fence if the resv has individualized. */
>   	WARN_ON_ONCE(bo->type == ttm_bo_type_kernel


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-11-05  7:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-04 23:05 [PATCH 1/1] drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs Felix Kuehling
2021-11-04 23:36 ` Errabolu, Ramesh
2021-11-05  7:11 ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.