All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats
@ 2021-06-22 21:26 ` Mikel Rychliski
  0 siblings, 0 replies; 11+ messages in thread
From: Mikel Rychliski @ 2021-06-22 21:26 UTC (permalink / raw)
  To: Alex Deucher, Christian König, Pan, Xinhui, David Airlie,
	Daniel Vetter, Thomas Hellström, amd-gfx, dri-devel,
	linux-kernel
  Cc: Mikel Rychliski

radeon_ttm_bo_destroy() is attempting to access the resource object to
update memory counters. However, the resource object is already freed when
ttm calls this function via the destroy callback. This causes an oops when
a bo is freed:

	BUG: kernel NULL pointer dereference, address: 0000000000000010
	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
	Call Trace:
	 radeon_bo_unref+0x1a/0x30 [radeon]
	 radeon_gem_object_free+0x33/0x50 [radeon]
	 drm_gem_object_release_handle+0x69/0x70 [drm]
	 drm_gem_handle_delete+0x62/0xa0 [drm]
	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
	 drm_ioctl_kernel+0xb2/0xf0 [drm]
	 drm_ioctl+0x30a/0x3c0 [drm]
	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
	 radeon_drm_ioctl+0x49/0x80 [radeon]
	 __x64_sys_ioctl+0x8e/0xd0

Avoid the issue by updating the counters in the delete_mem_notify callback
instead. Also, fix memory statistic updating in radeon_bo_move() to
identify the source type correctly. The source type needs to be saved
before the move, because the moved from object may be altered by the move.

Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
---

v2: Update statistics on ghost object destroy

 drivers/gpu/drm/radeon/radeon_object.c | 33 ++++++++-------------------------
 drivers/gpu/drm/radeon/radeon_object.h |  7 ++++---
 drivers/gpu/drm/radeon/radeon_ttm.c    | 20 +++++++++++++++++---
 3 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index bfaaa3c969a3..e0f98b394acd 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
  * function are calling it.
  */
 
-static void radeon_update_memory_usage(struct radeon_bo *bo,
-				       unsigned mem_type, int sign)
+void radeon_update_memory_usage(struct ttm_buffer_object *bo,
+				unsigned int mem_type, int sign)
 {
-	struct radeon_device *rdev = bo->rdev;
+	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
 
 	switch (mem_type) {
 	case TTM_PL_TT:
 		if (sign > 0)
-			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
+			atomic64_add(bo->base.size, &rdev->gtt_usage);
 		else
-			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
+			atomic64_sub(bo->base.size, &rdev->gtt_usage);
 		break;
 	case TTM_PL_VRAM:
 		if (sign > 0)
-			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
+			atomic64_add(bo->base.size, &rdev->vram_usage);
 		else
-			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
+			atomic64_sub(bo->base.size, &rdev->vram_usage);
 		break;
 	}
 }
@@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
 	bo = container_of(tbo, struct radeon_bo, tbo);
 
-	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
-
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
@@ -726,25 +724,10 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 	return radeon_bo_get_surface_reg(bo);
 }
 
-void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
-			   struct ttm_resource *new_mem)
+void radeon_bo_move_notify(struct radeon_bo *rbo)
 {
-	struct radeon_bo *rbo;
-
-	if (!radeon_ttm_bo_is_radeon_bo(bo))
-		return;
-
-	rbo = container_of(bo, struct radeon_bo, tbo);
 	radeon_bo_check_tiling(rbo, 0, 1);
 	radeon_vm_bo_invalidate(rbo->rdev, rbo);
-
-	/* update statistics */
-	if (!new_mem)
-		return;
-
-	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
-	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
 }
 
 vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 1739c6a142cd..0be50d28bafa 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -133,6 +133,9 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
 	return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
 }
 
+extern void radeon_update_memory_usage(struct ttm_buffer_object *bo,
+				       unsigned int mem_type, int sign);
+
 extern int radeon_bo_create(struct radeon_device *rdev,
 			    unsigned long size, int byte_align,
 			    bool kernel, u32 domain, u32 flags,
@@ -160,9 +163,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
 				u32 *tiling_flags, u32 *pitch);
 extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 				bool force_drop);
-extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-				  bool evict,
-				  struct ttm_resource *new_mem);
+extern void radeon_bo_move_notify(struct radeon_bo *rbo);
 extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
 extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index ad2a5a791bba..1bc0648c5865 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	struct ttm_resource *old_mem = bo->resource;
 	struct radeon_device *rdev;
 	struct radeon_bo *rbo;
-	int r;
+	int r, old_type;
 
 	if (new_mem->mem_type == TTM_PL_TT) {
 		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
@@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
 		return -EINVAL;
 
+	/* Save old type for statistics update */
+	old_type = old_mem->mem_type;
+
 	rdev = radeon_get_rdev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		ttm_bo_move_null(bo, new_mem);
@@ -261,7 +264,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 out:
 	/* update statistics */
 	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
-	radeon_bo_move_notify(bo, evict, new_mem);
+	radeon_update_memory_usage(bo, old_type, -1);
+	radeon_update_memory_usage(bo, new_mem->mem_type, 1);
+	radeon_bo_move_notify(rbo);
 	return 0;
 }
 
@@ -682,7 +687,16 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
 static void
 radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	radeon_bo_move_notify(bo, false, NULL);
+	struct radeon_bo *rbo;
+
+	if (bo->resource)
+		radeon_update_memory_usage(bo, bo->resource->mem_type, -1);
+
+	if (!radeon_ttm_bo_is_radeon_bo(bo))
+		return;
+
+	rbo = container_of(bo, struct radeon_bo, tbo);
+	radeon_bo_move_notify(rbo);
 }
 
 static struct ttm_device_funcs radeon_bo_driver = {
-- 
2.13.7


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats
@ 2021-06-22 21:26 ` Mikel Rychliski
  0 siblings, 0 replies; 11+ messages in thread
From: Mikel Rychliski @ 2021-06-22 21:26 UTC (permalink / raw)
  To: Alex Deucher, Christian König, Pan, Xinhui, David Airlie,
	Daniel Vetter, Thomas Hellström, amd-gfx, dri-devel,
	linux-kernel
  Cc: Mikel Rychliski

radeon_ttm_bo_destroy() is attempting to access the resource object to
update memory counters. However, the resource object is already freed when
ttm calls this function via the destroy callback. This causes an oops when
a bo is freed:

	BUG: kernel NULL pointer dereference, address: 0000000000000010
	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
	Call Trace:
	 radeon_bo_unref+0x1a/0x30 [radeon]
	 radeon_gem_object_free+0x33/0x50 [radeon]
	 drm_gem_object_release_handle+0x69/0x70 [drm]
	 drm_gem_handle_delete+0x62/0xa0 [drm]
	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
	 drm_ioctl_kernel+0xb2/0xf0 [drm]
	 drm_ioctl+0x30a/0x3c0 [drm]
	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
	 radeon_drm_ioctl+0x49/0x80 [radeon]
	 __x64_sys_ioctl+0x8e/0xd0

Avoid the issue by updating the counters in the delete_mem_notify callback
instead. Also, fix memory statistic updating in radeon_bo_move() to
identify the source type correctly. The source type needs to be saved
before the move, because the moved from object may be altered by the move.

Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
---

v2: Update statistics on ghost object destroy

 drivers/gpu/drm/radeon/radeon_object.c | 33 ++++++++-------------------------
 drivers/gpu/drm/radeon/radeon_object.h |  7 ++++---
 drivers/gpu/drm/radeon/radeon_ttm.c    | 20 +++++++++++++++++---
 3 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index bfaaa3c969a3..e0f98b394acd 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
  * function are calling it.
  */
 
-static void radeon_update_memory_usage(struct radeon_bo *bo,
-				       unsigned mem_type, int sign)
+void radeon_update_memory_usage(struct ttm_buffer_object *bo,
+				unsigned int mem_type, int sign)
 {
-	struct radeon_device *rdev = bo->rdev;
+	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
 
 	switch (mem_type) {
 	case TTM_PL_TT:
 		if (sign > 0)
-			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
+			atomic64_add(bo->base.size, &rdev->gtt_usage);
 		else
-			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
+			atomic64_sub(bo->base.size, &rdev->gtt_usage);
 		break;
 	case TTM_PL_VRAM:
 		if (sign > 0)
-			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
+			atomic64_add(bo->base.size, &rdev->vram_usage);
 		else
-			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
+			atomic64_sub(bo->base.size, &rdev->vram_usage);
 		break;
 	}
 }
@@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
 	bo = container_of(tbo, struct radeon_bo, tbo);
 
-	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
-
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
@@ -726,25 +724,10 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 	return radeon_bo_get_surface_reg(bo);
 }
 
-void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
-			   struct ttm_resource *new_mem)
+void radeon_bo_move_notify(struct radeon_bo *rbo)
 {
-	struct radeon_bo *rbo;
-
-	if (!radeon_ttm_bo_is_radeon_bo(bo))
-		return;
-
-	rbo = container_of(bo, struct radeon_bo, tbo);
 	radeon_bo_check_tiling(rbo, 0, 1);
 	radeon_vm_bo_invalidate(rbo->rdev, rbo);
-
-	/* update statistics */
-	if (!new_mem)
-		return;
-
-	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
-	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
 }
 
 vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 1739c6a142cd..0be50d28bafa 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -133,6 +133,9 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
 	return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
 }
 
+extern void radeon_update_memory_usage(struct ttm_buffer_object *bo,
+				       unsigned int mem_type, int sign);
+
 extern int radeon_bo_create(struct radeon_device *rdev,
 			    unsigned long size, int byte_align,
 			    bool kernel, u32 domain, u32 flags,
@@ -160,9 +163,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
 				u32 *tiling_flags, u32 *pitch);
 extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 				bool force_drop);
-extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-				  bool evict,
-				  struct ttm_resource *new_mem);
+extern void radeon_bo_move_notify(struct radeon_bo *rbo);
 extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
 extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index ad2a5a791bba..1bc0648c5865 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	struct ttm_resource *old_mem = bo->resource;
 	struct radeon_device *rdev;
 	struct radeon_bo *rbo;
-	int r;
+	int r, old_type;
 
 	if (new_mem->mem_type == TTM_PL_TT) {
 		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
@@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
 		return -EINVAL;
 
+	/* Save old type for statistics update */
+	old_type = old_mem->mem_type;
+
 	rdev = radeon_get_rdev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		ttm_bo_move_null(bo, new_mem);
@@ -261,7 +264,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 out:
 	/* update statistics */
 	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
-	radeon_bo_move_notify(bo, evict, new_mem);
+	radeon_update_memory_usage(bo, old_type, -1);
+	radeon_update_memory_usage(bo, new_mem->mem_type, 1);
+	radeon_bo_move_notify(rbo);
 	return 0;
 }
 
@@ -682,7 +687,16 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
 static void
 radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	radeon_bo_move_notify(bo, false, NULL);
+	struct radeon_bo *rbo;
+
+	if (bo->resource)
+		radeon_update_memory_usage(bo, bo->resource->mem_type, -1);
+
+	if (!radeon_ttm_bo_is_radeon_bo(bo))
+		return;
+
+	rbo = container_of(bo, struct radeon_bo, tbo);
+	radeon_bo_move_notify(rbo);
 }
 
 static struct ttm_device_funcs radeon_bo_driver = {
-- 
2.13.7

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats
  2021-06-22 21:26 ` Mikel Rychliski
@ 2021-06-23  6:55   ` Christian König
  -1 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2021-06-23  6:55 UTC (permalink / raw)
  To: Mikel Rychliski, Alex Deucher, Pan, Xinhui, David Airlie,
	Daniel Vetter, Thomas Hellström, amd-gfx, dri-devel,
	linux-kernel

Am 22.06.21 um 23:26 schrieb Mikel Rychliski:
> radeon_ttm_bo_destroy() is attempting to access the resource object to
> update memory counters. However, the resource object is already freed when
> ttm calls this function via the destroy callback. This causes an oops when
> a bo is freed:
>
> 	BUG: kernel NULL pointer dereference, address: 0000000000000010
> 	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
> 	Call Trace:
> 	 radeon_bo_unref+0x1a/0x30 [radeon]
> 	 radeon_gem_object_free+0x33/0x50 [radeon]
> 	 drm_gem_object_release_handle+0x69/0x70 [drm]
> 	 drm_gem_handle_delete+0x62/0xa0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 drm_ioctl_kernel+0xb2/0xf0 [drm]
> 	 drm_ioctl+0x30a/0x3c0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 radeon_drm_ioctl+0x49/0x80 [radeon]
> 	 __x64_sys_ioctl+0x8e/0xd0
>
> Avoid the issue by updating the counters in the delete_mem_notify callback
> instead. Also, fix memory statistic updating in radeon_bo_move() to
> identify the source type correctly. The source type needs to be saved
> before the move, because the moved from object may be altered by the move.
>
> Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
> Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
> ---
>
> v2: Update statistics on ghost object destroy
>
>   drivers/gpu/drm/radeon/radeon_object.c | 33 ++++++++-------------------------
>   drivers/gpu/drm/radeon/radeon_object.h |  7 ++++---
>   drivers/gpu/drm/radeon/radeon_ttm.c    | 20 +++++++++++++++++---
>   3 files changed, 29 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index bfaaa3c969a3..e0f98b394acd 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
>    * function are calling it.
>    */
>   
> -static void radeon_update_memory_usage(struct radeon_bo *bo,
> -				       unsigned mem_type, int sign)
> +void radeon_update_memory_usage(struct ttm_buffer_object *bo,
> +				unsigned int mem_type, int sign)
>   {
> -	struct radeon_device *rdev = bo->rdev;
> +	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
>   
>   	switch (mem_type) {
>   	case TTM_PL_TT:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_add(bo->base.size, &rdev->gtt_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_sub(bo->base.size, &rdev->gtt_usage);
>   		break;
>   	case TTM_PL_VRAM:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_add(bo->base.size, &rdev->vram_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_sub(bo->base.size, &rdev->vram_usage);
>   		break;
>   	}
>   }
> @@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
>   
>   	bo = container_of(tbo, struct radeon_bo, tbo);
>   
> -	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
> -
>   	mutex_lock(&bo->rdev->gem.mutex);
>   	list_del_init(&bo->list);
>   	mutex_unlock(&bo->rdev->gem.mutex);
> @@ -726,25 +724,10 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   	return radeon_bo_get_surface_reg(bo);
>   }
>   
> -void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -			   bool evict,
> -			   struct ttm_resource *new_mem)
> +void radeon_bo_move_notify(struct radeon_bo *rbo)

Please rather keep the new resource as parameter here and update before 
adjusting bo->resource.

This way you also don't need to export radeon_update_memory_usage().

Christian.

>   {
> -	struct radeon_bo *rbo;
> -
> -	if (!radeon_ttm_bo_is_radeon_bo(bo))
> -		return;
> -
> -	rbo = container_of(bo, struct radeon_bo, tbo);
>   	radeon_bo_check_tiling(rbo, 0, 1);
>   	radeon_vm_bo_invalidate(rbo->rdev, rbo);
> -
> -	/* update statistics */
> -	if (!new_mem)
> -		return;
> -
> -	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
> -	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
>   }
>   
>   vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index 1739c6a142cd..0be50d28bafa 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -133,6 +133,9 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
>   	return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
>   }
>   
> +extern void radeon_update_memory_usage(struct ttm_buffer_object *bo,
> +				       unsigned int mem_type, int sign);
> +
>   extern int radeon_bo_create(struct radeon_device *rdev,
>   			    unsigned long size, int byte_align,
>   			    bool kernel, u32 domain, u32 flags,
> @@ -160,9 +163,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
>   				u32 *tiling_flags, u32 *pitch);
>   extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   				bool force_drop);
> -extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -				  bool evict,
> -				  struct ttm_resource *new_mem);
> +extern void radeon_bo_move_notify(struct radeon_bo *rbo);
>   extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>   extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
>   extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index ad2a5a791bba..1bc0648c5865 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	struct ttm_resource *old_mem = bo->resource;
>   	struct radeon_device *rdev;
>   	struct radeon_bo *rbo;
> -	int r;
> +	int r, old_type;
>   
>   	if (new_mem->mem_type == TTM_PL_TT) {
>   		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
> @@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
>   		return -EINVAL;
>   
> +	/* Save old type for statistics update */
> +	old_type = old_mem->mem_type;
> +
>   	rdev = radeon_get_rdev(bo->bdev);
>   	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>   		ttm_bo_move_null(bo, new_mem);
> @@ -261,7 +264,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   out:
>   	/* update statistics */
>   	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
> -	radeon_bo_move_notify(bo, evict, new_mem);
> +	radeon_update_memory_usage(bo, old_type, -1);
> +	radeon_update_memory_usage(bo, new_mem->mem_type, 1);
> +	radeon_bo_move_notify(rbo);
>   	return 0;
>   }
>   
> @@ -682,7 +687,16 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
>   static void
>   radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>   {
> -	radeon_bo_move_notify(bo, false, NULL);
> +	struct radeon_bo *rbo;
> +
> +	if (bo->resource)
> +		radeon_update_memory_usage(bo, bo->resource->mem_type, -1);
> +
> +	if (!radeon_ttm_bo_is_radeon_bo(bo))
> +		return;
> +
> +	rbo = container_of(bo, struct radeon_bo, tbo);
> +	radeon_bo_move_notify(rbo);
>   }
>   
>   static struct ttm_device_funcs radeon_bo_driver = {


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats
@ 2021-06-23  6:55   ` Christian König
  0 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2021-06-23  6:55 UTC (permalink / raw)
  To: Mikel Rychliski, Alex Deucher, Pan, Xinhui, David Airlie,
	Daniel Vetter, Thomas Hellström, amd-gfx, dri-devel,
	linux-kernel

Am 22.06.21 um 23:26 schrieb Mikel Rychliski:
> radeon_ttm_bo_destroy() is attempting to access the resource object to
> update memory counters. However, the resource object is already freed when
> ttm calls this function via the destroy callback. This causes an oops when
> a bo is freed:
>
> 	BUG: kernel NULL pointer dereference, address: 0000000000000010
> 	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
> 	Call Trace:
> 	 radeon_bo_unref+0x1a/0x30 [radeon]
> 	 radeon_gem_object_free+0x33/0x50 [radeon]
> 	 drm_gem_object_release_handle+0x69/0x70 [drm]
> 	 drm_gem_handle_delete+0x62/0xa0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 drm_ioctl_kernel+0xb2/0xf0 [drm]
> 	 drm_ioctl+0x30a/0x3c0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 radeon_drm_ioctl+0x49/0x80 [radeon]
> 	 __x64_sys_ioctl+0x8e/0xd0
>
> Avoid the issue by updating the counters in the delete_mem_notify callback
> instead. Also, fix memory statistic updating in radeon_bo_move() to
> identify the source type correctly. The source type needs to be saved
> before the move, because the moved from object may be altered by the move.
>
> Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
> Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
> ---
>
> v2: Update statistics on ghost object destroy
>
>   drivers/gpu/drm/radeon/radeon_object.c | 33 ++++++++-------------------------
>   drivers/gpu/drm/radeon/radeon_object.h |  7 ++++---
>   drivers/gpu/drm/radeon/radeon_ttm.c    | 20 +++++++++++++++++---
>   3 files changed, 29 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index bfaaa3c969a3..e0f98b394acd 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
>    * function are calling it.
>    */
>   
> -static void radeon_update_memory_usage(struct radeon_bo *bo,
> -				       unsigned mem_type, int sign)
> +void radeon_update_memory_usage(struct ttm_buffer_object *bo,
> +				unsigned int mem_type, int sign)
>   {
> -	struct radeon_device *rdev = bo->rdev;
> +	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
>   
>   	switch (mem_type) {
>   	case TTM_PL_TT:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_add(bo->base.size, &rdev->gtt_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_sub(bo->base.size, &rdev->gtt_usage);
>   		break;
>   	case TTM_PL_VRAM:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_add(bo->base.size, &rdev->vram_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_sub(bo->base.size, &rdev->vram_usage);
>   		break;
>   	}
>   }
> @@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
>   
>   	bo = container_of(tbo, struct radeon_bo, tbo);
>   
> -	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
> -
>   	mutex_lock(&bo->rdev->gem.mutex);
>   	list_del_init(&bo->list);
>   	mutex_unlock(&bo->rdev->gem.mutex);
> @@ -726,25 +724,10 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   	return radeon_bo_get_surface_reg(bo);
>   }
>   
> -void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -			   bool evict,
> -			   struct ttm_resource *new_mem)
> +void radeon_bo_move_notify(struct radeon_bo *rbo)

Please rather keep the new resource as parameter here and update before 
adjusting bo->resource.

This way you also don't need to export radeon_update_memory_usage().

Christian.

>   {
> -	struct radeon_bo *rbo;
> -
> -	if (!radeon_ttm_bo_is_radeon_bo(bo))
> -		return;
> -
> -	rbo = container_of(bo, struct radeon_bo, tbo);
>   	radeon_bo_check_tiling(rbo, 0, 1);
>   	radeon_vm_bo_invalidate(rbo->rdev, rbo);
> -
> -	/* update statistics */
> -	if (!new_mem)
> -		return;
> -
> -	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
> -	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
>   }
>   
>   vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index 1739c6a142cd..0be50d28bafa 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -133,6 +133,9 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
>   	return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
>   }
>   
> +extern void radeon_update_memory_usage(struct ttm_buffer_object *bo,
> +				       unsigned int mem_type, int sign);
> +
>   extern int radeon_bo_create(struct radeon_device *rdev,
>   			    unsigned long size, int byte_align,
>   			    bool kernel, u32 domain, u32 flags,
> @@ -160,9 +163,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
>   				u32 *tiling_flags, u32 *pitch);
>   extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   				bool force_drop);
> -extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -				  bool evict,
> -				  struct ttm_resource *new_mem);
> +extern void radeon_bo_move_notify(struct radeon_bo *rbo);
>   extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>   extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
>   extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index ad2a5a791bba..1bc0648c5865 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	struct ttm_resource *old_mem = bo->resource;
>   	struct radeon_device *rdev;
>   	struct radeon_bo *rbo;
> -	int r;
> +	int r, old_type;
>   
>   	if (new_mem->mem_type == TTM_PL_TT) {
>   		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
> @@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
>   		return -EINVAL;
>   
> +	/* Save old type for statistics update */
> +	old_type = old_mem->mem_type;
> +
>   	rdev = radeon_get_rdev(bo->bdev);
>   	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>   		ttm_bo_move_null(bo, new_mem);
> @@ -261,7 +264,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   out:
>   	/* update statistics */
>   	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
> -	radeon_bo_move_notify(bo, evict, new_mem);
> +	radeon_update_memory_usage(bo, old_type, -1);
> +	radeon_update_memory_usage(bo, new_mem->mem_type, 1);
> +	radeon_bo_move_notify(rbo);
>   	return 0;
>   }
>   
> @@ -682,7 +687,16 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
>   static void
>   radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>   {
> -	radeon_bo_move_notify(bo, false, NULL);
> +	struct radeon_bo *rbo;
> +
> +	if (bo->resource)
> +		radeon_update_memory_usage(bo, bo->resource->mem_type, -1);
> +
> +	if (!radeon_ttm_bo_is_radeon_bo(bo))
> +		return;
> +
> +	rbo = container_of(bo, struct radeon_bo, tbo);
> +	radeon_bo_move_notify(rbo);
>   }
>   
>   static struct ttm_device_funcs radeon_bo_driver = {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3] drm/radeon: Fix NULL dereference when updating memory stats
  2021-06-23  6:55   ` Christian König
@ 2021-06-24  4:51     ` Mikel Rychliski
  -1 siblings, 0 replies; 11+ messages in thread
From: Mikel Rychliski @ 2021-06-24  4:51 UTC (permalink / raw)
  To: Alex Deucher, Christian König, Pan, Xinhui, David Airlie,
	Daniel Vetter, Thomas Hellström, amd-gfx, dri-devel,
	linux-kernel
  Cc: Mikel Rychliski

radeon_ttm_bo_destroy() is attempting to access the resource object to
update memory counters. However, the resource object is already freed when
ttm calls this function via the destroy callback. This causes an oops when
a bo is freed:

	BUG: kernel NULL pointer dereference, address: 0000000000000010
	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
	Call Trace:
	 radeon_bo_unref+0x1a/0x30 [radeon]
	 radeon_gem_object_free+0x33/0x50 [radeon]
	 drm_gem_object_release_handle+0x69/0x70 [drm]
	 drm_gem_handle_delete+0x62/0xa0 [drm]
	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
	 drm_ioctl_kernel+0xb2/0xf0 [drm]
	 drm_ioctl+0x30a/0x3c0 [drm]
	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
	 radeon_drm_ioctl+0x49/0x80 [radeon]
	 __x64_sys_ioctl+0x8e/0xd0

Avoid the issue by updating the counters in the delete_mem_notify callback
instead. Also, fix memory statistic updating in radeon_bo_move() to
identify the source type correctly. The source type needs to be saved
before the move, because the moved from object may be altered by the move.

Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
---
 drivers/gpu/drm/radeon/radeon_object.c | 29 ++++++++++++-----------------
 drivers/gpu/drm/radeon/radeon_object.h |  2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c    | 13 ++++++++++---
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index bfaaa3c969a3..56ede9d63b12 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
  * function are calling it.
  */
 
-static void radeon_update_memory_usage(struct radeon_bo *bo,
-				       unsigned mem_type, int sign)
+static void radeon_update_memory_usage(struct ttm_buffer_object *bo,
+				       unsigned int mem_type, int sign)
 {
-	struct radeon_device *rdev = bo->rdev;
+	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
 
 	switch (mem_type) {
 	case TTM_PL_TT:
 		if (sign > 0)
-			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
+			atomic64_add(bo->base.size, &rdev->gtt_usage);
 		else
-			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
+			atomic64_sub(bo->base.size, &rdev->gtt_usage);
 		break;
 	case TTM_PL_VRAM:
 		if (sign > 0)
-			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
+			atomic64_add(bo->base.size, &rdev->vram_usage);
 		else
-			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
+			atomic64_sub(bo->base.size, &rdev->vram_usage);
 		break;
 	}
 }
@@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
 	bo = container_of(tbo, struct radeon_bo, tbo);
 
-	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
-
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
@@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 }
 
 void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
+			   unsigned int old_type,
 			   struct ttm_resource *new_mem)
 {
 	struct radeon_bo *rbo;
 
+	radeon_update_memory_usage(bo, old_type, -1);
+	if (new_mem)
+		radeon_update_memory_usage(bo, new_mem->mem_type, 1);
+
 	if (!radeon_ttm_bo_is_radeon_bo(bo))
 		return;
 
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	radeon_bo_check_tiling(rbo, 0, 1);
 	radeon_vm_bo_invalidate(rbo->rdev, rbo);
-
-	/* update statistics */
-	if (!new_mem)
-		return;
-
-	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
-	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
 }
 
 vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 1739c6a142cd..1afc7992ef91 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
 extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 				bool force_drop);
 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-				  bool evict,
+				  unsigned int old_type,
 				  struct ttm_resource *new_mem);
 extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index ad2a5a791bba..a06d4cc2fb1c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	struct ttm_resource *old_mem = bo->resource;
 	struct radeon_device *rdev;
 	struct radeon_bo *rbo;
-	int r;
+	int r, old_type;
 
 	if (new_mem->mem_type == TTM_PL_TT) {
 		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
@@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
 		return -EINVAL;
 
+	/* Save old type for statistics update */
+	old_type = old_mem->mem_type;
+
 	rdev = radeon_get_rdev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		ttm_bo_move_null(bo, new_mem);
@@ -261,7 +264,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 out:
 	/* update statistics */
 	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
-	radeon_bo_move_notify(bo, evict, new_mem);
+	radeon_bo_move_notify(bo, old_type, new_mem);
 	return 0;
 }
 
@@ -682,7 +685,11 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
 static void
 radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	radeon_bo_move_notify(bo, false, NULL);
+	unsigned int old_type = TTM_PL_SYSTEM;
+
+	if (bo->resource)
+		old_type = bo->resource->mem_type;
+	radeon_bo_move_notify(bo, old_type, NULL);
 }
 
 static struct ttm_device_funcs radeon_bo_driver = {
-- 
2.13.7


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3] drm/radeon: Fix NULL dereference when updating memory stats
@ 2021-06-24  4:51     ` Mikel Rychliski
  0 siblings, 0 replies; 11+ messages in thread
From: Mikel Rychliski @ 2021-06-24  4:51 UTC (permalink / raw)
  To: Alex Deucher, Christian König, Pan, Xinhui, David Airlie,
	Daniel Vetter, Thomas Hellström, amd-gfx, dri-devel,
	linux-kernel
  Cc: Mikel Rychliski

radeon_ttm_bo_destroy() is attempting to access the resource object to
update memory counters. However, the resource object is already freed when
ttm calls this function via the destroy callback. This causes an oops when
a bo is freed:

	BUG: kernel NULL pointer dereference, address: 0000000000000010
	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
	Call Trace:
	 radeon_bo_unref+0x1a/0x30 [radeon]
	 radeon_gem_object_free+0x33/0x50 [radeon]
	 drm_gem_object_release_handle+0x69/0x70 [drm]
	 drm_gem_handle_delete+0x62/0xa0 [drm]
	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
	 drm_ioctl_kernel+0xb2/0xf0 [drm]
	 drm_ioctl+0x30a/0x3c0 [drm]
	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
	 radeon_drm_ioctl+0x49/0x80 [radeon]
	 __x64_sys_ioctl+0x8e/0xd0

Avoid the issue by updating the counters in the delete_mem_notify callback
instead. Also, fix memory statistic updating in radeon_bo_move() to
identify the source type correctly. The source type needs to be saved
before the move, because the moved from object may be altered by the move.

Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
---
 drivers/gpu/drm/radeon/radeon_object.c | 29 ++++++++++++-----------------
 drivers/gpu/drm/radeon/radeon_object.h |  2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c    | 13 ++++++++++---
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index bfaaa3c969a3..56ede9d63b12 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
  * function are calling it.
  */
 
-static void radeon_update_memory_usage(struct radeon_bo *bo,
-				       unsigned mem_type, int sign)
+static void radeon_update_memory_usage(struct ttm_buffer_object *bo,
+				       unsigned int mem_type, int sign)
 {
-	struct radeon_device *rdev = bo->rdev;
+	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
 
 	switch (mem_type) {
 	case TTM_PL_TT:
 		if (sign > 0)
-			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
+			atomic64_add(bo->base.size, &rdev->gtt_usage);
 		else
-			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
+			atomic64_sub(bo->base.size, &rdev->gtt_usage);
 		break;
 	case TTM_PL_VRAM:
 		if (sign > 0)
-			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
+			atomic64_add(bo->base.size, &rdev->vram_usage);
 		else
-			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
+			atomic64_sub(bo->base.size, &rdev->vram_usage);
 		break;
 	}
 }
@@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
 	bo = container_of(tbo, struct radeon_bo, tbo);
 
-	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
-
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
@@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 }
 
 void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-			   bool evict,
+			   unsigned int old_type,
 			   struct ttm_resource *new_mem)
 {
 	struct radeon_bo *rbo;
 
+	radeon_update_memory_usage(bo, old_type, -1);
+	if (new_mem)
+		radeon_update_memory_usage(bo, new_mem->mem_type, 1);
+
 	if (!radeon_ttm_bo_is_radeon_bo(bo))
 		return;
 
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	radeon_bo_check_tiling(rbo, 0, 1);
 	radeon_vm_bo_invalidate(rbo->rdev, rbo);
-
-	/* update statistics */
-	if (!new_mem)
-		return;
-
-	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
-	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
 }
 
 vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 1739c6a142cd..1afc7992ef91 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
 extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 				bool force_drop);
 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-				  bool evict,
+				  unsigned int old_type,
 				  struct ttm_resource *new_mem);
 extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index ad2a5a791bba..a06d4cc2fb1c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	struct ttm_resource *old_mem = bo->resource;
 	struct radeon_device *rdev;
 	struct radeon_bo *rbo;
-	int r;
+	int r, old_type;
 
 	if (new_mem->mem_type == TTM_PL_TT) {
 		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
@@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
 		return -EINVAL;
 
+	/* Save old type for statistics update */
+	old_type = old_mem->mem_type;
+
 	rdev = radeon_get_rdev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		ttm_bo_move_null(bo, new_mem);
@@ -261,7 +264,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 out:
 	/* update statistics */
 	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
-	radeon_bo_move_notify(bo, evict, new_mem);
+	radeon_bo_move_notify(bo, old_type, new_mem);
 	return 0;
 }
 
@@ -682,7 +685,11 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
 static void
 radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-	radeon_bo_move_notify(bo, false, NULL);
+	unsigned int old_type = TTM_PL_SYSTEM;
+
+	if (bo->resource)
+		old_type = bo->resource->mem_type;
+	radeon_bo_move_notify(bo, old_type, NULL);
 }
 
 static struct ttm_device_funcs radeon_bo_driver = {
-- 
2.13.7

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats
  2021-06-23  6:55   ` Christian König
  (?)
@ 2021-06-24  4:52     ` Mikel Rychliski
  -1 siblings, 0 replies; 11+ messages in thread
From: Mikel Rychliski @ 2021-06-24  4:52 UTC (permalink / raw)
  To: Christian König
  Cc: Alex Deucher, Pan, Xinhui, David Airlie, Daniel Vetter,
	Thomas Hellström, amd-gfx, dri-devel, linux-kernel

On Wednesday, June 23, 2021 2:55:04 AM EDT Christian König wrote:
> Please rather keep the new resource as parameter here and update before
> adjusting bo->resource.
> 
> This way you also don't need to export radeon_update_memory_usage().

I wasn't sure exactly what you intended with the request to "update before
adjusting bo->resource".

Assuming the statistics update is done as part of radeon_bo_move_notify(), I 
believe that function cannot be called any earlier in radeon_bo_move(). If it 
were, the source object would be invalidated before it moved.

So I assume you're suggesting updating the memory usage earlier in 
bo_move_notify (before the early return for ghost objects).

Thanks,
Mikel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats
@ 2021-06-24  4:52     ` Mikel Rychliski
  0 siblings, 0 replies; 11+ messages in thread
From: Mikel Rychliski @ 2021-06-24  4:52 UTC (permalink / raw)
  To: Christian König
  Cc: Thomas Hellström, David Airlie, Pan, Xinhui, linux-kernel,
	amd-gfx, dri-devel, Alex Deucher

On Wednesday, June 23, 2021 2:55:04 AM EDT Christian König wrote:
> Please rather keep the new resource as parameter here and update before
> adjusting bo->resource.
> 
> This way you also don't need to export radeon_update_memory_usage().

I wasn't sure exactly what you intended with the request to "update before
adjusting bo->resource".

Assuming the statistics update is done as part of radeon_bo_move_notify(), I 
believe that function cannot be called any earlier in radeon_bo_move(). If it 
were, the source object would be invalidated before it moved.

So I assume you're suggesting updating the memory usage earlier in 
bo_move_notify (before the early return for ghost objects).

Thanks,
Mikel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats
@ 2021-06-24  4:52     ` Mikel Rychliski
  0 siblings, 0 replies; 11+ messages in thread
From: Mikel Rychliski @ 2021-06-24  4:52 UTC (permalink / raw)
  To: Christian König
  Cc: Thomas Hellström, David Airlie, Pan, Xinhui, linux-kernel,
	amd-gfx, dri-devel, Daniel Vetter, Alex Deucher

On Wednesday, June 23, 2021 2:55:04 AM EDT Christian König wrote:
> Please rather keep the new resource as parameter here and update before
> adjusting bo->resource.
> 
> This way you also don't need to export radeon_update_memory_usage().

I wasn't sure exactly what you intended with the request to "update before
adjusting bo->resource".

Assuming the statistics update is done as part of radeon_bo_move_notify(), I 
believe that function cannot be called any earlier in radeon_bo_move(). If it 
were, the source object would be invalidated before it moved.

So I assume you're suggesting updating the memory usage earlier in 
bo_move_notify (before the early return for ghost objects).

Thanks,
Mikel
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] drm/radeon: Fix NULL dereference when updating memory stats
  2021-06-24  4:51     ` Mikel Rychliski
@ 2021-06-30  9:55       ` Christian König
  -1 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2021-06-30  9:55 UTC (permalink / raw)
  To: Mikel Rychliski, Alex Deucher, Pan, Xinhui, David Airlie,
	Daniel Vetter, Thomas Hellström, amd-gfx, dri-devel,
	linux-kernel

Am 24.06.21 um 06:51 schrieb Mikel Rychliski:
> radeon_ttm_bo_destroy() is attempting to access the resource object to
> update memory counters. However, the resource object is already freed when
> ttm calls this function via the destroy callback. This causes an oops when
> a bo is freed:
>
> 	BUG: kernel NULL pointer dereference, address: 0000000000000010
> 	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
> 	Call Trace:
> 	 radeon_bo_unref+0x1a/0x30 [radeon]
> 	 radeon_gem_object_free+0x33/0x50 [radeon]
> 	 drm_gem_object_release_handle+0x69/0x70 [drm]
> 	 drm_gem_handle_delete+0x62/0xa0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 drm_ioctl_kernel+0xb2/0xf0 [drm]
> 	 drm_ioctl+0x30a/0x3c0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 radeon_drm_ioctl+0x49/0x80 [radeon]
> 	 __x64_sys_ioctl+0x8e/0xd0
>
> Avoid the issue by updating the counters in the delete_mem_notify callback
> instead. Also, fix memory statistic updating in radeon_bo_move() to
> identify the source type correctly. The source type needs to be saved
> before the move, because the moved from object may be altered by the move.
>
> Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
> Signed-off-by: Mikel Rychliski <mikel@mikelr.com>

So, back from vacation. I've reviewed and pushed the patch to drm-misc-next.

Thanks for the help,
Christian.

> ---
>   drivers/gpu/drm/radeon/radeon_object.c | 29 ++++++++++++-----------------
>   drivers/gpu/drm/radeon/radeon_object.h |  2 +-
>   drivers/gpu/drm/radeon/radeon_ttm.c    | 13 ++++++++++---
>   3 files changed, 23 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index bfaaa3c969a3..56ede9d63b12 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
>    * function are calling it.
>    */
>   
> -static void radeon_update_memory_usage(struct radeon_bo *bo,
> -				       unsigned mem_type, int sign)
> +static void radeon_update_memory_usage(struct ttm_buffer_object *bo,
> +				       unsigned int mem_type, int sign)
>   {
> -	struct radeon_device *rdev = bo->rdev;
> +	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
>   
>   	switch (mem_type) {
>   	case TTM_PL_TT:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_add(bo->base.size, &rdev->gtt_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_sub(bo->base.size, &rdev->gtt_usage);
>   		break;
>   	case TTM_PL_VRAM:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_add(bo->base.size, &rdev->vram_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_sub(bo->base.size, &rdev->vram_usage);
>   		break;
>   	}
>   }
> @@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
>   
>   	bo = container_of(tbo, struct radeon_bo, tbo);
>   
> -	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
> -
>   	mutex_lock(&bo->rdev->gem.mutex);
>   	list_del_init(&bo->list);
>   	mutex_unlock(&bo->rdev->gem.mutex);
> @@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   }
>   
>   void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -			   bool evict,
> +			   unsigned int old_type,
>   			   struct ttm_resource *new_mem)
>   {
>   	struct radeon_bo *rbo;
>   
> +	radeon_update_memory_usage(bo, old_type, -1);
> +	if (new_mem)
> +		radeon_update_memory_usage(bo, new_mem->mem_type, 1);
> +
>   	if (!radeon_ttm_bo_is_radeon_bo(bo))
>   		return;
>   
>   	rbo = container_of(bo, struct radeon_bo, tbo);
>   	radeon_bo_check_tiling(rbo, 0, 1);
>   	radeon_vm_bo_invalidate(rbo->rdev, rbo);
> -
> -	/* update statistics */
> -	if (!new_mem)
> -		return;
> -
> -	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
> -	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
>   }
>   
>   vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index 1739c6a142cd..1afc7992ef91 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
>   extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   				bool force_drop);
>   extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -				  bool evict,
> +				  unsigned int old_type,
>   				  struct ttm_resource *new_mem);
>   extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>   extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index ad2a5a791bba..a06d4cc2fb1c 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	struct ttm_resource *old_mem = bo->resource;
>   	struct radeon_device *rdev;
>   	struct radeon_bo *rbo;
> -	int r;
> +	int r, old_type;
>   
>   	if (new_mem->mem_type == TTM_PL_TT) {
>   		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
> @@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
>   		return -EINVAL;
>   
> +	/* Save old type for statistics update */
> +	old_type = old_mem->mem_type;
> +
>   	rdev = radeon_get_rdev(bo->bdev);
>   	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>   		ttm_bo_move_null(bo, new_mem);
> @@ -261,7 +264,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   out:
>   	/* update statistics */
>   	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
> -	radeon_bo_move_notify(bo, evict, new_mem);
> +	radeon_bo_move_notify(bo, old_type, new_mem);
>   	return 0;
>   }
>   
> @@ -682,7 +685,11 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
>   static void
>   radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>   {
> -	radeon_bo_move_notify(bo, false, NULL);
> +	unsigned int old_type = TTM_PL_SYSTEM;
> +
> +	if (bo->resource)
> +		old_type = bo->resource->mem_type;
> +	radeon_bo_move_notify(bo, old_type, NULL);
>   }
>   
>   static struct ttm_device_funcs radeon_bo_driver = {


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] drm/radeon: Fix NULL dereference when updating memory stats
@ 2021-06-30  9:55       ` Christian König
  0 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2021-06-30  9:55 UTC (permalink / raw)
  To: Mikel Rychliski, Alex Deucher, Pan, Xinhui, David Airlie,
	Daniel Vetter, Thomas Hellström, amd-gfx, dri-devel,
	linux-kernel

Am 24.06.21 um 06:51 schrieb Mikel Rychliski:
> radeon_ttm_bo_destroy() is attempting to access the resource object to
> update memory counters. However, the resource object is already freed when
> ttm calls this function via the destroy callback. This causes an oops when
> a bo is freed:
>
> 	BUG: kernel NULL pointer dereference, address: 0000000000000010
> 	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
> 	Call Trace:
> 	 radeon_bo_unref+0x1a/0x30 [radeon]
> 	 radeon_gem_object_free+0x33/0x50 [radeon]
> 	 drm_gem_object_release_handle+0x69/0x70 [drm]
> 	 drm_gem_handle_delete+0x62/0xa0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 drm_ioctl_kernel+0xb2/0xf0 [drm]
> 	 drm_ioctl+0x30a/0x3c0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 radeon_drm_ioctl+0x49/0x80 [radeon]
> 	 __x64_sys_ioctl+0x8e/0xd0
>
> Avoid the issue by updating the counters in the delete_mem_notify callback
> instead. Also, fix memory statistic updating in radeon_bo_move() to
> identify the source type correctly. The source type needs to be saved
> before the move, because the moved from object may be altered by the move.
>
> Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
> Signed-off-by: Mikel Rychliski <mikel@mikelr.com>

So, back from vacation. I've reviewed and pushed the patch to drm-misc-next.

Thanks for the help,
Christian.

> ---
>   drivers/gpu/drm/radeon/radeon_object.c | 29 ++++++++++++-----------------
>   drivers/gpu/drm/radeon/radeon_object.h |  2 +-
>   drivers/gpu/drm/radeon/radeon_ttm.c    | 13 ++++++++++---
>   3 files changed, 23 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index bfaaa3c969a3..56ede9d63b12 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
>    * function are calling it.
>    */
>   
> -static void radeon_update_memory_usage(struct radeon_bo *bo,
> -				       unsigned mem_type, int sign)
> +static void radeon_update_memory_usage(struct ttm_buffer_object *bo,
> +				       unsigned int mem_type, int sign)
>   {
> -	struct radeon_device *rdev = bo->rdev;
> +	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
>   
>   	switch (mem_type) {
>   	case TTM_PL_TT:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_add(bo->base.size, &rdev->gtt_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_sub(bo->base.size, &rdev->gtt_usage);
>   		break;
>   	case TTM_PL_VRAM:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_add(bo->base.size, &rdev->vram_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_sub(bo->base.size, &rdev->vram_usage);
>   		break;
>   	}
>   }
> @@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
>   
>   	bo = container_of(tbo, struct radeon_bo, tbo);
>   
> -	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
> -
>   	mutex_lock(&bo->rdev->gem.mutex);
>   	list_del_init(&bo->list);
>   	mutex_unlock(&bo->rdev->gem.mutex);
> @@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   }
>   
>   void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -			   bool evict,
> +			   unsigned int old_type,
>   			   struct ttm_resource *new_mem)
>   {
>   	struct radeon_bo *rbo;
>   
> +	radeon_update_memory_usage(bo, old_type, -1);
> +	if (new_mem)
> +		radeon_update_memory_usage(bo, new_mem->mem_type, 1);
> +
>   	if (!radeon_ttm_bo_is_radeon_bo(bo))
>   		return;
>   
>   	rbo = container_of(bo, struct radeon_bo, tbo);
>   	radeon_bo_check_tiling(rbo, 0, 1);
>   	radeon_vm_bo_invalidate(rbo->rdev, rbo);
> -
> -	/* update statistics */
> -	if (!new_mem)
> -		return;
> -
> -	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
> -	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
>   }
>   
>   vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index 1739c6a142cd..1afc7992ef91 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
>   extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   				bool force_drop);
>   extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -				  bool evict,
> +				  unsigned int old_type,
>   				  struct ttm_resource *new_mem);
>   extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>   extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index ad2a5a791bba..a06d4cc2fb1c 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	struct ttm_resource *old_mem = bo->resource;
>   	struct radeon_device *rdev;
>   	struct radeon_bo *rbo;
> -	int r;
> +	int r, old_type;
>   
>   	if (new_mem->mem_type == TTM_PL_TT) {
>   		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
> @@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
>   		return -EINVAL;
>   
> +	/* Save old type for statistics update */
> +	old_type = old_mem->mem_type;
> +
>   	rdev = radeon_get_rdev(bo->bdev);
>   	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>   		ttm_bo_move_null(bo, new_mem);
> @@ -261,7 +264,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   out:
>   	/* update statistics */
>   	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
> -	radeon_bo_move_notify(bo, evict, new_mem);
> +	radeon_bo_move_notify(bo, old_type, new_mem);
>   	return 0;
>   }
>   
> @@ -682,7 +685,11 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
>   static void
>   radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>   {
> -	radeon_bo_move_notify(bo, false, NULL);
> +	unsigned int old_type = TTM_PL_SYSTEM;
> +
> +	if (bo->resource)
> +		old_type = bo->resource->mem_type;
> +	radeon_bo_move_notify(bo, old_type, NULL);
>   }
>   
>   static struct ttm_device_funcs radeon_bo_driver = {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-06-30  9:55 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-22 21:26 [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats Mikel Rychliski
2021-06-22 21:26 ` Mikel Rychliski
2021-06-23  6:55 ` Christian König
2021-06-23  6:55   ` Christian König
2021-06-24  4:51   ` [PATCH v3] " Mikel Rychliski
2021-06-24  4:51     ` Mikel Rychliski
2021-06-30  9:55     ` Christian König
2021-06-30  9:55       ` Christian König
2021-06-24  4:52   ` [PATCH v2] " Mikel Rychliski
2021-06-24  4:52     ` Mikel Rychliski
2021-06-24  4:52     ` Mikel Rychliski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.